diff --git "a/profile_trace/iteration_10752/rank1_trace.json" "b/profile_trace/iteration_10752/rank1_trace.json" new file mode 100644--- /dev/null +++ "b/profile_trace/iteration_10752/rank1_trace.json" @@ -0,0 +1,157273 @@ + +{ + "schemaVersion": 1, + "deviceProperties": [ + { + "id": 0, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 1, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 2, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 3, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 4, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 5, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 6, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 7, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + } + ], + "cupti_version": 22, + "cuda_runtime_version": 12040, + "cuda_driver_version": 12080, + "distributedInfo": {"backend": "nccl", "rank": 1, "world_size": 8, "pg_count": 1, "pg_config": [{"pg_name": "0", "pg_desc": "default_pg", "backend_config": "cuda:nccl", "pg_size": 8, "ranks": [0, 1, 2, 3, 4, 5, 6, 7]}], "nccl_version": "2.21.5"}, + "record_shapes": 1, + "trace_id": "4AEBD239083C47119A603482B3D556C4", + "traceEvents": [ + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450720425.697, "dur": 155.011, + "args": { + "External id": 457217,"Record function id": 0, "Sequence number": 5036592, "Fwd thread id": 1, "Ev Idx": 0 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450720451.834, "dur": 116.753, + "args": { + "External id": 457218,"Sequence number": 5036592, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 1 + } + }, + { + "ph": "f", "id": 1, "pid": 2338707, "tid": 2379456, "ts": 6267450720451.834, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338707, "tid": 2379456, + "ts": 6267450720463.119, "dur": 103.028, + "args": { + "External id": 457219,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 2 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450720613.851, "dur": 303.666, + "args": { + "External id": 457220,"Record function id": 0, "Ev Idx": 3 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450720694.350, "dur": 121.698, + "args": { + "External id": 457221,"Record function id": 0, "Ev Idx": 4 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.25", "pid": 2338707, "tid": 2379456, + "ts": 6267450720738.573, "dur": 63.508, + "args": { + "External id": 457222,"Record function id": 0, "Ev Idx": 5 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450720822.428, "dur": 2.440, + "args": { + "External id": 457223,"Sequence number": 5036591, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 6 + } + }, + { + "ph": "f", "id": 2, "pid": 2338707, "tid": 2379456, "ts": 6267450720822.428, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450720831.548, "dur": 78.539, + "args": { + "External id": 457224,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 7 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450720844.185, "dur": 65.147, + "args": { + "External id": 457225,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 8 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450720858.982, "dur": 4.568, + "args": { + "External id": 457226,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450720929.268, "dur": 37362.720, + "args": { + "External id": 457227,"Record function id": 0, "Sequence number": 5036589, "Fwd thread id": 1, "Ev Idx": 10 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450720931.651, "dur": 37339.816, + "args": { + "External id": 457228,"Sequence number": 5036589, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11 + } + }, + { + "ph": "f", "id": 3, "pid": 2338707, "tid": 2379456, "ts": 6267450720931.651, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450720992.071, "dur": 4.657, + "args": { + "External id": 457229,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267450721004.298, "dur": 36860.480, + "args": { + "External id": 457230,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267450721007.134, "dur": 36857.246, + "args": { + "External id": 457231,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 14 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450721013.596, "dur": 6.345, + "args": { + "External id": 457232,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450721022.892, "dur": 36839.263, + "args": { + "External id": 457233,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338707, "tid": 2379456, + "ts": 6267450757870.894, "dur": 0.653, + "args": { + "External id": 457234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 17 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338707, "tid": 2379456, + "ts": 6267450757874.788, "dur": 3.741, + "args": { + "External id": 457235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 18 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338707, "tid": 2379456, + "ts": 6267450757877.011, "dur": 1.370, + "args": { + "External id": 457236,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2379456, + "ts": 6267450757886.381, "dur": 37.733, + "args": { + "External id": 457237,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 20 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338707, "tid": 2379456, + "ts": 6267450757934.649, "dur": 56.117, + "args": { + "External id": 457238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2379456, + "ts": 6267450757937.003, "dur": 53.514, + "args": { + "External id": 457239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 22 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2379456, + "ts": 6267450757939.274, "dur": 50.677, + "args": { + "External id": 457240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 23 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450758311.922, "dur": 27.080, + "args": { + "External id": 457241,"Record function id": 0, "Sequence number": 5036588, "Fwd thread id": 1, "Ev Idx": 24 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450758314.888, "dur": 19.174, + "args": { + "External id": 457242,"Sequence number": 5036588, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 25 + } + }, + { + "ph": "f", "id": 4, "pid": 2338707, "tid": 2379456, "ts": 6267450758314.888, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450758321.284, "dur": 12.417, + "args": { + "External id": 457243,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 26 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450758326.445, "dur": 6.997, + "args": { + "External id": 457244,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 27 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450758346.030, "dur": 153.319, + "args": { + "External id": 457245,"Record function id": 0, "Sequence number": 5036587, "Fwd thread id": 1, "Ev Idx": 28 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450758347.541, "dur": 141.509, + "args": { + "External id": 457246,"Sequence number": 5036587, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 29 + } + }, + { + "ph": "f", "id": 5, "pid": 2338707, "tid": 2379456, "ts": 6267450758347.541, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450758351.922, "dur": 136.427, + "args": { + "External id": 457247,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 30 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267450758359.546, "dur": 52.464, + "args": { + "External id": 457248,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 31 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450758365.795, "dur": 9.207, + "args": { + "External id": 457249,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 32 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267450758378.147, "dur": 33.388, + "args": { + "External id": 457250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 33 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267450758382.284, "dur": 28.653, + "args": { + "External id": 457251,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 34 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267450758418.022, "dur": 5.905, + "args": { + "External id": 457252,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 35 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450758421.570, "dur": 1.874, + "args": { + "External id": 457253,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 36 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450758425.933, "dur": 61.245, + "args": { + "External id": 457254,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 37 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450758505.385, "dur": 106.706, + "args": { + "External id": 457255,"Record function id": 0, "Sequence number": 5036586, "Fwd thread id": 1, "Ev Idx": 38 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450758507.133, "dur": 97.610, + "args": { + "External id": 457256,"Sequence number": 5036586, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 39 + } + }, + { + "ph": "f", "id": 6, "pid": 2338707, "tid": 2379456, "ts": 6267450758507.133, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450758510.894, "dur": 80.251, + "args": { + "External id": 457257,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "3"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 40 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267450758516.682, "dur": 31.280, + "args": { + "External id": 457258,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 41 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450758518.358, "dur": 3.670, + "args": { + "External id": 457259,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 42 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267450758523.416, "dur": 24.100, + "args": { + "External id": 457260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 43 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267450758527.079, "dur": 19.938, + "args": { + "External id": 457261,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 44 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2379456, + "ts": 6267450758550.368, "dur": 13.038, + "args": { + "External id": 457262,"Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 45 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450758558.014, "dur": 4.440, + "args": { + "External id": 457263,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 46 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450758564.349, "dur": 25.990, + "args": { + "External id": 457264,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 47 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450758619.892, "dur": 271.765, + "args": { + "External id": 457265,"Record function id": 0, "Sequence number": 5036585, "Fwd thread id": 1, "Ev Idx": 48 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450758621.469, "dur": 264.715, + "args": { + "External id": 457266,"Sequence number": 5036585, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 49 + } + }, + { + "ph": "f", "id": 7, "pid": 2338707, "tid": 2379456, "ts": 6267450758621.469, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450758627.043, "dur": 258.557, + "args": { + "External id": 457267,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 50 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267450758630.552, "dur": 26.476, + "args": { + "External id": 457268,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 51 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450758631.948, "dur": 4.503, + "args": { + "External id": 457269,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 52 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267450758637.244, "dur": 19.451, + "args": { + "External id": 457270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 53 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267450758638.572, "dur": 17.723, + "args": { + "External id": 457271,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 54 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267450758658.410, "dur": 3.449, + "args": { + "External id": 457272,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 55 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450758660.078, "dur": 1.504, + "args": { + "External id": 457273,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 56 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450758666.327, "dur": 217.936, + "args": { + "External id": 457274,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 57 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450758897.562, "dur": 103.792, + "args": { + "External id": 457275,"Record function id": 0, "Sequence number": 5036584, "Fwd thread id": 1, "Ev Idx": 58 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450758898.904, "dur": 97.180, + "args": { + "External id": 457276,"Sequence number": 5036584, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 59 + } + }, + { + "ph": "f", "id": 8, "pid": 2338707, "tid": 2379456, "ts": 6267450758898.904, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450758900.945, "dur": 94.494, + "args": { + "External id": 457277,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 60 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267450758904.908, "dur": 21.442, + "args": { + "External id": 457278,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 61 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450758906.162, "dur": 2.622, + "args": { + "External id": 457279,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 62 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267450758909.644, "dur": 16.393, + "args": { + "External id": 457280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 63 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267450758910.826, "dur": 14.817, + "args": { + "External id": 457281,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 64 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267450758927.437, "dur": 5.280, + "args": { + "External id": 457282,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 65 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450758931.962, "dur": 0.614, + "args": { + "External id": 457283,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 66 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450758933.844, "dur": 60.721, + "args": { + "External id": 457284,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 67 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450759009.875, "dur": 105.787, + "args": { + "External id": 457285,"Record function id": 0, "Sequence number": 5036583, "Fwd thread id": 1, "Ev Idx": 68 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450759011.992, "dur": 1.451, + "args": { + "External id": 457286,"Sequence number": 5036583, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 69 + } + }, + { + "ph": "f", "id": 9, "pid": 2338707, "tid": 2379456, "ts": 6267450759011.992, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450759017.272, "dur": 91.350, + "args": { + "External id": 457287,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 70 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450759019.922, "dur": 87.547, + "args": { + "External id": 457288,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 71 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450759032.027, "dur": 0.704, + "args": { + "External id": 457289,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 72 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450759125.239, "dur": 2299.145, + "args": { + "External id": 457290,"Record function id": 0, "Sequence number": 5036581, "Fwd thread id": 1, "Ev Idx": 73 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450759127.211, "dur": 2244.913, + "args": { + "External id": 457291,"Sequence number": 5036581, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 74 + } + }, + { + "ph": "f", "id": 10, "pid": 2338707, "tid": 2379456, "ts": 6267450759127.211, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450759177.302, "dur": 4.008, + "args": { + "External id": 457292,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 75 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267450759184.460, "dur": 1908.245, + "args": { + "External id": 457293,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 76 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267450759186.564, "dur": 1905.750, + "args": { + "External id": 457294,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 77 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450759189.964, "dur": 5.922, + "args": { + "External id": 457295,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 78 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450759197.195, "dur": 1893.794, + "args": { + "External id": 457296,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 79 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338707, "tid": 2379456, + "ts": 6267450761096.922, "dur": 0.501, + "args": { + "External id": 457297,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 80 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338707, "tid": 2379456, + "ts": 6267450761099.494, "dur": 6.137, + "args": { + "External id": 457298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 81 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338707, "tid": 2379456, + "ts": 6267450761104.190, "dur": 1.266, + "args": { + "External id": 457299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 82 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2379456, + "ts": 6267450761111.184, "dur": 32.272, + "args": { + "External id": 457300,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 83 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338707, "tid": 2379456, + "ts": 6267450761150.920, "dur": 50.238, + "args": { + "External id": 457301,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 84 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2379456, + "ts": 6267450761152.290, "dur": 48.591, + "args": { + "External id": 457302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 85 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2379456, + "ts": 6267450761153.765, "dur": 46.641, + "args": { + "External id": 457303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 86 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267450761389.735, "dur": 27.640, + "args": { + "External id": 457304,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 87 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450761437.336, "dur": 19.581, + "args": { + "External id": 457305,"Record function id": 0, "Sequence number": 5036580, "Fwd thread id": 1, "Ev Idx": 88 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450761439.231, "dur": 14.150, + "args": { + "External id": 457306,"Sequence number": 5036580, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 89 + } + }, + { + "ph": "f", "id": 11, "pid": 2338707, "tid": 2379456, "ts": 6267450761439.231, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450761445.854, "dur": 7.241, + "args": { + "External id": 457307,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 90 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450761447.619, "dur": 5.199, + "args": { + "External id": 457308,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 91 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450761461.525, "dur": 110.698, + "args": { + "External id": 457309,"Record function id": 0, "Sequence number": 5036579, "Fwd thread id": 1, "Ev Idx": 92 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450761462.843, "dur": 102.501, + "args": { + "External id": 457310,"Sequence number": 5036579, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 93 + } + }, + { + "ph": "f", "id": 12, "pid": 2338707, "tid": 2379456, "ts": 6267450761462.843, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450761465.158, "dur": 99.545, + "args": { + "External id": 457311,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 94 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267450761473.633, "dur": 34.140, + "args": { + "External id": 457312,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 95 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450761475.855, "dur": 4.400, + "args": { + "External id": 457313,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 96 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267450761481.709, "dur": 25.737, + "args": { + "External id": 457314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 97 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267450761484.457, "dur": 22.316, + "args": { + "External id": 457315,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 98 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267450761509.736, "dur": 5.353, + "args": { + "External id": 457316,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 99 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450761512.969, "dur": 1.709, + "args": { + "External id": 457317,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450761517.174, "dur": 46.502, + "args": { + "External id": 457318,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450761577.697, "dur": 94.685, + "args": { + "External id": 457319,"Record function id": 0, "Sequence number": 5036578, "Fwd thread id": 1, "Ev Idx": 102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450761579.114, "dur": 88.651, + "args": { + "External id": 457320,"Sequence number": 5036578, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 103 + } + }, + { + "ph": "f", "id": 13, "pid": 2338707, "tid": 2379456, "ts": 6267450761579.114, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450761584.603, "dur": 82.675, + "args": { + "External id": 457321,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "2"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267450761586.688, "dur": 46.040, + "args": { + "External id": 457322,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450761587.709, "dur": 2.562, + "args": { + "External id": 457323,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267450761608.882, "dur": 23.449, + "args": { + "External id": 457324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267450761610.908, "dur": 20.812, + "args": { + "External id": 457325,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2379456, + "ts": 6267450761634.365, "dur": 9.476, + "args": { + "External id": 457326,"Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450761641.668, "dur": 1.494, + "args": { + "External id": 457327,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450761645.017, "dur": 21.351, + "args": { + "External id": 457328,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450761681.756, "dur": 159.467, + "args": { + "External id": 457329,"Record function id": 0, "Sequence number": 5036577, "Fwd thread id": 1, "Ev Idx": 112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450761683.472, "dur": 153.013, + "args": { + "External id": 457330,"Sequence number": 5036577, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 113 + } + }, + { + "ph": "f", "id": 14, "pid": 2338707, "tid": 2379456, "ts": 6267450761683.472, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450761685.842, "dur": 150.147, + "args": { + "External id": 457331,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267450761687.198, "dur": 22.318, + "args": { + "External id": 457332,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450761688.340, "dur": 3.092, + "args": { + "External id": 457333,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267450761694.588, "dur": 14.604, + "args": { + "External id": 457334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267450761696.109, "dur": 12.616, + "args": { + "External id": 457335,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267450761710.855, "dur": 5.092, + "args": { + "External id": 457336,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450761714.712, "dur": 0.874, + "args": { + "External id": 457337,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450761716.969, "dur": 117.933, + "args": { + "External id": 457338,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450761846.652, "dur": 135.827, + "args": { + "External id": 457339,"Record function id": 0, "Sequence number": 5036576, "Fwd thread id": 1, "Ev Idx": 122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450761848.069, "dur": 110.103, + "args": { + "External id": 457340,"Sequence number": 5036576, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 123 + } + }, + { + "ph": "f", "id": 15, "pid": 2338707, "tid": 2379456, "ts": 6267450761848.069, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450761852.318, "dur": 105.382, + "args": { + "External id": 457341,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267450761853.632, "dur": 24.489, + "args": { + "External id": 457342,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450761854.574, "dur": 2.755, + "args": { + "External id": 457343,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267450761858.250, "dur": 19.566, + "args": { + "External id": 457344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267450761862.419, "dur": 14.917, + "args": { + "External id": 457345,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267450761879.262, "dur": 2.627, + "args": { + "External id": 457346,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450761880.853, "dur": 0.829, + "args": { + "External id": 457347,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450761882.743, "dur": 74.125, + "args": { + "External id": 457348,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267450761963.003, "dur": 17.518, + "args": { + "External id": 457349,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450761988.091, "dur": 47.205, + "args": { + "External id": 457350,"Record function id": 0, "Sequence number": 5036575, "Fwd thread id": 1, "Ev Idx": 133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450761989.779, "dur": 1.221, + "args": { + "External id": 457351,"Sequence number": 5036575, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 134 + } + }, + { + "ph": "f", "id": 16, "pid": 2338707, "tid": 2379456, "ts": 6267450761989.779, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450761994.106, "dur": 35.633, + "args": { + "External id": 457352,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450761996.574, "dur": 32.672, + "args": { + "External id": 457353,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450762007.609, "dur": 0.973, + "args": { + "External id": 457354,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450762041.574, "dur": 3376.845, + "args": { + "External id": 457355,"Record function id": 0, "Sequence number": 5036573, "Fwd thread id": 1, "Ev Idx": 138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450762045.944, "dur": 3329.678, + "args": { + "External id": 457356,"Sequence number": 5036573, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 139 + } + }, + { + "ph": "f", "id": 17, "pid": 2338707, "tid": 2379456, "ts": 6267450762045.944, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450762127.303, "dur": 4.455, + "args": { + "External id": 457357,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267450762134.874, "dur": 2982.483, + "args": { + "External id": 457358,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267450762139.536, "dur": 2977.381, + "args": { + "External id": 457359,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450762142.833, "dur": 5.066, + "args": { + "External id": 457360,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450762148.904, "dur": 2966.383, + "args": { + "External id": 457361,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338707, "tid": 2379456, + "ts": 6267450765121.564, "dur": 0.411, + "args": { + "External id": 457362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338707, "tid": 2379456, + "ts": 6267450765123.681, "dur": 3.097, + "args": { + "External id": 457363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338707, "tid": 2379456, + "ts": 6267450765125.214, "dur": 1.420, + "args": { + "External id": 457364,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2379456, + "ts": 6267450765132.828, "dur": 28.586, + "args": { + "External id": 457365,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338707, "tid": 2379456, + "ts": 6267450765170.857, "dur": 45.931, + "args": { + "External id": 457366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2379456, + "ts": 6267450765172.589, "dur": 43.953, + "args": { + "External id": 457367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2379456, + "ts": 6267450765174.029, "dur": 42.119, + "args": { + "External id": 457368,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267450765391.335, "dur": 20.293, + "args": { + "External id": 457369,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450765431.633, "dur": 20.049, + "args": { + "External id": 457370,"Record function id": 0, "Sequence number": 5036572, "Fwd thread id": 1, "Ev Idx": 153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450765436.949, "dur": 11.093, + "args": { + "External id": 457371,"Sequence number": 5036572, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 154 + } + }, + { + "ph": "f", "id": 18, "pid": 2338707, "tid": 2379456, "ts": 6267450765436.949, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450765440.514, "dur": 7.227, + "args": { + "External id": 457372,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450765442.318, "dur": 5.104, + "args": { + "External id": 457373,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450765455.964, "dur": 91.795, + "args": { + "External id": 457374,"Record function id": 0, "Sequence number": 5036571, "Fwd thread id": 1, "Ev Idx": 157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450765457.427, "dur": 83.018, + "args": { + "External id": 457375,"Sequence number": 5036571, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 158 + } + }, + { + "ph": "f", "id": 19, "pid": 2338707, "tid": 2379456, "ts": 6267450765457.427, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450765462.052, "dur": 77.977, + "args": { + "External id": 457376,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267450765465.517, "dur": 26.335, + "args": { + "External id": 457377,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450765467.324, "dur": 3.768, + "args": { + "External id": 457378,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267450765472.250, "dur": 19.267, + "args": { + "External id": 457379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267450765474.346, "dur": 16.587, + "args": { + "External id": 457380,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267450765493.812, "dur": 7.343, + "args": { + "External id": 457381,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450765499.389, "dur": 1.384, + "args": { + "External id": 457382,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450765505.124, "dur": 33.933, + "args": { + "External id": 457383,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450765552.918, "dur": 83.590, + "args": { + "External id": 457384,"Record function id": 0, "Sequence number": 5036570, "Fwd thread id": 1, "Ev Idx": 167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450765554.523, "dur": 76.507, + "args": { + "External id": 457385,"Sequence number": 5036570, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 168 + } + }, + { + "ph": "f", "id": 20, "pid": 2338707, "tid": 2379456, "ts": 6267450765554.523, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450765557.570, "dur": 73.060, + "args": { + "External id": 457386,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267450765559.747, "dur": 20.070, + "args": { + "External id": 457387,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450765560.910, "dur": 3.405, + "args": { + "External id": 457388,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267450765565.198, "dur": 14.316, + "args": { + "External id": 457389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267450765566.065, "dur": 13.038, + "args": { + "External id": 457390,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2379456, + "ts": 6267450765581.262, "dur": 26.354, + "args": { + "External id": 457391,"Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450765590.809, "dur": 15.295, + "args": { + "External id": 457392,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450765609.265, "dur": 20.602, + "args": { + "External id": 457393,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450765643.225, "dur": 143.672, + "args": { + "External id": 457394,"Record function id": 0, "Sequence number": 5036569, "Fwd thread id": 1, "Ev Idx": 177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450765644.724, "dur": 137.319, + "args": { + "External id": 457395,"Sequence number": 5036569, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 178 + } + }, + { + "ph": "f", "id": 21, "pid": 2338707, "tid": 2379456, "ts": 6267450765644.724, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450765646.570, "dur": 134.984, + "args": { + "External id": 457396,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267450765648.196, "dur": 26.090, + "args": { + "External id": 457397,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450765649.098, "dur": 5.672, + "args": { + "External id": 457398,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267450765655.645, "dur": 18.276, + "args": { + "External id": 457399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267450765656.676, "dur": 16.712, + "args": { + "External id": 457400,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267450765675.677, "dur": 5.416, + "args": { + "External id": 457401,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450765679.846, "dur": 0.909, + "args": { + "External id": 457402,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450765684.742, "dur": 95.688, + "args": { + "External id": 457403,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450765792.603, "dur": 117.626, + "args": { + "External id": 457404,"Record function id": 0, "Sequence number": 5036568, "Fwd thread id": 1, "Ev Idx": 187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450765794.188, "dur": 93.908, + "args": { + "External id": 457405,"Sequence number": 5036568, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 188 + } + }, + { + "ph": "f", "id": 22, "pid": 2338707, "tid": 2379456, "ts": 6267450765794.188, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450765798.779, "dur": 88.760, + "args": { + "External id": 457406,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267450765800.050, "dur": 18.898, + "args": { + "External id": 457407,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450765801.138, "dur": 2.632, + "args": { + "External id": 457408,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267450765804.743, "dur": 13.855, + "args": { + "External id": 457409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267450765805.852, "dur": 12.338, + "args": { + "External id": 457410,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267450765819.896, "dur": 4.410, + "args": { + "External id": 457411,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450765823.556, "dur": 0.540, + "args": { + "External id": 457412,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450765827.895, "dur": 58.412, + "args": { + "External id": 457413,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267450765893.554, "dur": 15.150, + "args": { + "External id": 457414,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450765918.873, "dur": 42.540, + "args": { + "External id": 457415,"Record function id": 0, "Sequence number": 5036567, "Fwd thread id": 1, "Ev Idx": 198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450765920.455, "dur": 1.233, + "args": { + "External id": 457416,"Sequence number": 5036567, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 199 + } + }, + { + "ph": "f", "id": 23, "pid": 2338707, "tid": 2379456, "ts": 6267450765920.455, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450765924.344, "dur": 29.766, + "args": { + "External id": 457417,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450765927.097, "dur": 26.520, + "args": { + "External id": 457418,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450765933.146, "dur": 0.893, + "args": { + "External id": 457419,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450765967.075, "dur": 3441.237, + "args": { + "External id": 457420,"Record function id": 0, "Sequence number": 5036566, "Fwd thread id": 1, "Ev Idx": 203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450765982.451, "dur": 3385.628, + "args": { + "External id": 457421,"Sequence number": 5036566, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 204 + } + }, + { + "ph": "f", "id": 24, "pid": 2338707, "tid": 2379456, "ts": 6267450765982.451, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450766019.597, "dur": 2.934, + "args": { + "External id": 457422,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267450766025.230, "dur": 3087.133, + "args": { + "External id": 457423,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267450766030.311, "dur": 3081.604, + "args": { + "External id": 457424,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450766035.545, "dur": 4.353, + "args": { + "External id": 457425,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450766041.191, "dur": 3068.865, + "args": { + "External id": 457426,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338707, "tid": 2379456, + "ts": 6267450769116.630, "dur": 0.486, + "args": { + "External id": 457427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338707, "tid": 2379456, + "ts": 6267450769119.140, "dur": 2.836, + "args": { + "External id": 457428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338707, "tid": 2379456, + "ts": 6267450769120.614, "dur": 1.212, + "args": { + "External id": 457429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2379456, + "ts": 6267450769127.892, "dur": 24.917, + "args": { + "External id": 457430,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338707, "tid": 2379456, + "ts": 6267450769159.006, "dur": 51.836, + "args": { + "External id": 457431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2379456, + "ts": 6267450769160.429, "dur": 50.165, + "args": { + "External id": 457432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2379456, + "ts": 6267450769162.254, "dur": 47.856, + "args": { + "External id": 457433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267450769382.567, "dur": 20.032, + "args": { + "External id": 457434,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450769426.395, "dur": 19.099, + "args": { + "External id": 457435,"Record function id": 0, "Ev Idx": 218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450769429.724, "dur": 13.515, + "args": { + "External id": 457436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450769433.777, "dur": 8.253, + "args": { + "External id": 457437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450769437.833, "dur": 4.019, + "args": { + "External id": 457438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450769450.155, "dur": 17.014, + "args": { + "External id": 457439,"Record function id": 0, "Sequence number": 5036565, "Fwd thread id": 1, "Ev Idx": 222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450769451.708, "dur": 12.587, + "args": { + "External id": 457440,"Sequence number": 5036565, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 223 + } + }, + { + "ph": "f", "id": 25, "pid": 2338707, "tid": 2379456, "ts": 6267450769451.708, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450769455.250, "dur": 8.813, + "args": { + "External id": 457441,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450769459.803, "dur": 4.044, + "args": { + "External id": 457442,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450769471.483, "dur": 93.516, + "args": { + "External id": 457443,"Record function id": 0, "Sequence number": 5036564, "Fwd thread id": 1, "Ev Idx": 226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450769472.680, "dur": 85.067, + "args": { + "External id": 457444,"Sequence number": 5036564, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 227 + } + }, + { + "ph": "f", "id": 26, "pid": 2338707, "tid": 2379456, "ts": 6267450769472.680, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450769475.532, "dur": 81.612, + "args": { + "External id": 457445,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267450769479.291, "dur": 29.732, + "args": { + "External id": 457446,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450769481.076, "dur": 3.962, + "args": { + "External id": 457447,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267450769486.280, "dur": 22.409, + "args": { + "External id": 457448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267450769488.991, "dur": 19.143, + "args": { + "External id": 457449,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267450769513.547, "dur": 7.669, + "args": { + "External id": 457450,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450769519.531, "dur": 1.363, + "args": { + "External id": 457451,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450769522.628, "dur": 33.601, + "args": { + "External id": 457452,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450769570.038, "dur": 99.382, + "args": { + "External id": 457453,"Record function id": 0, "Sequence number": 5036563, "Fwd thread id": 1, "Ev Idx": 236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450769571.534, "dur": 92.433, + "args": { + "External id": 457454,"Sequence number": 5036563, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 237 + } + }, + { + "ph": "f", "id": 27, "pid": 2338707, "tid": 2379456, "ts": 6267450769571.534, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450769577.671, "dur": 85.870, + "args": { + "External id": 457455,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267450769579.989, "dur": 46.450, + "args": { + "External id": 457456,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450769586.092, "dur": 3.230, + "args": { + "External id": 457457,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267450769590.334, "dur": 35.720, + "args": { + "External id": 457458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267450769607.195, "dur": 18.030, + "args": { + "External id": 457459,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2379456, + "ts": 6267450769631.078, "dur": 6.401, + "args": { + "External id": 457460,"Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450769635.637, "dur": 1.231, + "args": { + "External id": 457461,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450769638.866, "dur": 23.998, + "args": { + "External id": 457462,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450769675.795, "dur": 145.865, + "args": { + "External id": 457463,"Record function id": 0, "Sequence number": 5036562, "Fwd thread id": 1, "Ev Idx": 246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450769677.092, "dur": 139.280, + "args": { + "External id": 457464,"Sequence number": 5036562, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 247 + } + }, + { + "ph": "f", "id": 28, "pid": 2338707, "tid": 2379456, "ts": 6267450769677.092, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450769679.517, "dur": 136.211, + "args": { + "External id": 457465,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267450769683.837, "dur": 18.505, + "args": { + "External id": 457466,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450769684.722, "dur": 2.972, + "args": { + "External id": 457467,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267450769688.568, "dur": 13.433, + "args": { + "External id": 457468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267450769689.400, "dur": 12.166, + "args": { + "External id": 457469,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267450769706.526, "dur": 5.478, + "args": { + "External id": 457470,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450769708.066, "dur": 3.653, + "args": { + "External id": 457471,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450769713.291, "dur": 101.020, + "args": { + "External id": 457472,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450769827.395, "dur": 146.871, + "args": { + "External id": 457473,"Record function id": 0, "Sequence number": 5036561, "Fwd thread id": 1, "Ev Idx": 256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450769829.008, "dur": 119.582, + "args": { + "External id": 457474,"Sequence number": 5036561, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 257 + } + }, + { + "ph": "f", "id": 29, "pid": 2338707, "tid": 2379456, "ts": 6267450769829.008, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450769831.024, "dur": 117.106, + "args": { + "External id": 457475,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267450769832.433, "dur": 36.186, + "args": { + "External id": 457476,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450769838.756, "dur": 2.885, + "args": { + "External id": 457477,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267450769847.196, "dur": 21.091, + "args": { + "External id": 457478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267450769851.393, "dur": 16.403, + "args": { + "External id": 457479,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267450769872.949, "dur": 2.833, + "args": { + "External id": 457480,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450769874.580, "dur": 0.885, + "args": { + "External id": 457481,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450769876.579, "dur": 70.413, + "args": { + "External id": 457482,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267450769954.159, "dur": 17.236, + "args": { + "External id": 457483,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450769982.363, "dur": 486.720, + "args": { + "External id": 457484,"Record function id": 0, "Sequence number": 5036560, "Fwd thread id": 1, "Ev Idx": 267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450769984.116, "dur": 471.935, + "args": { + "External id": 457485,"Sequence number": 5036560, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 268 + } + }, + { + "ph": "f", "id": 30, "pid": 2338707, "tid": 2379456, "ts": 6267450769984.116, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267450770245.954, "dur": 58.886, + "args": { + "External id": 457486,"kernel_hash": "cze4nwoznzp5ccj3536wygl2i2ykkdyt3hdmkvx2y6tl7tevwkdm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "131072", "4096", "1", "993", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ze/cze4nwoznzp5ccj3536wygl2i2ykkdyt3hdmkvx2y6tl7tevwkdm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [4096], [131072, 4096], [131072, 4096], [132, 4096], [131072], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 2338707, "tid": 2379456, + "ts": 6267450770350.695, "dur": 33.040, + "args": { + "External id": 457487,"kernel_hash": "ccswrdngzi6jo4g2p2v6rs3fdbvr7wkgz5izdhrj3lqu7uzzy4ad", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/cs/ccswrdngzi6jo4g2p2v6rs3fdbvr7wkgz5izdhrj3lqu7uzzy4ad.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 2338707, "tid": 2379456, + "ts": 6267450770404.832, "dur": 21.923, + "args": { + "External id": 457488,"kernel_hash": "cddm6binnqpaq4idvznth7jqun5cvp5pc6dr7dh3ezznaxoowmjo", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/dd/cddm6binnqpaq4idvznth7jqun5cvp5pc6dr7dh3ezznaxoowmjo.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450770483.019, "dur": 16.798, + "args": { + "External id": 457489,"Record function id": 0, "Ev Idx": 272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450770485.995, "dur": 12.415, + "args": { + "External id": 457490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450770489.662, "dur": 7.548, + "args": { + "External id": 457491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450770493.264, "dur": 3.773, + "args": { + "External id": 457492,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: StackBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450770504.734, "dur": 47.808, + "args": { + "External id": 457493,"Record function id": 0, "Sequence number": 5036559, "Fwd thread id": 1, "Ev Idx": 276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "StackBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450770506.064, "dur": 34.438, + "args": { + "External id": 457494,"Sequence number": 5036559, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 277 + } + }, + { + "ph": "f", "id": 31, "pid": 2338707, "tid": 2379456, "ts": 6267450770506.064, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2379456, + "ts": 6267450770508.992, "dur": 11.567, + "args": { + "External id": 457495,"Record function id": 0, "Concrete Inputs": ["", "-2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450770516.443, "dur": 1.842, + "args": { + "External id": 457496,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2379456, + "ts": 6267450770521.474, "dur": 8.962, + "args": { + "External id": 457497,"Record function id": 0, "Concrete Inputs": ["", "-2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450770528.617, "dur": 0.802, + "args": { + "External id": 457498,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2379456, + "ts": 6267450770530.939, "dur": 2.866, + "args": { + "External id": 457499,"Record function id": 0, "Concrete Inputs": ["", "-2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450770532.081, "dur": 0.931, + "args": { + "External id": 457500,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2379456, + "ts": 6267450770534.417, "dur": 5.332, + "args": { + "External id": 457501,"Record function id": 0, "Concrete Inputs": ["", "-2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450770538.487, "dur": 0.650, + "args": { + "External id": 457502,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450770557.332, "dur": 7.329, + "args": { + "External id": 457503,"Record function id": 0, "Sequence number": 5036558, "Fwd thread id": 1, "Ev Idx": 286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450770558.745, "dur": 1.499, + "args": { + "External id": 457504,"Sequence number": 5036558, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 287 + } + }, + { + "ph": "f", "id": 32, "pid": 2338707, "tid": 2379456, "ts": 6267450770558.745, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450770569.948, "dur": 759.572, + "args": { + "External id": 457505,"Record function id": 0, "Sequence number": 5036557, "Fwd thread id": 1, "Ev Idx": 288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450770571.327, "dur": 737.572, + "args": { + "External id": 457506,"Sequence number": 5036557, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 289 + } + }, + { + "ph": "f", "id": 33, "pid": 2338707, "tid": 2379456, "ts": 6267450770571.327, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450770646.303, "dur": 16.367, + "args": { + "External id": 457507,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450770656.102, "dur": 6.036, + "args": { + "External id": 457508,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450770670.549, "dur": 9.027, + "args": { + "External id": 457509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450770674.944, "dur": 3.495, + "args": { + "External id": 457510,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450770677.036, "dur": 1.117, + "args": { + "External id": 457511,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2379456, + "ts": 6267450770688.661, "dur": 164.915, + "args": { + "External id": 457512,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450770690.019, "dur": 8.950, + "args": { + "External id": 457513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450770690.884, "dur": 7.157, + "args": { + "External id": 457514,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450770694.996, "dur": 2.892, + "args": { + "External id": 457515,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2379456, + "ts": 6267450770701.477, "dur": 151.037, + "args": { + "External id": 457516,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450770705.271, "dur": 146.007, + "args": { + "External id": 457517,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267450770860.804, "dur": 8.094, + "args": { + "External id": 457518,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450770865.467, "dur": 3.271, + "args": { + "External id": 457519,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450770918.252, "dur": 8.846, + "args": { + "External id": 457520,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450770928.320, "dur": 2.163, + "args": { + "External id": 457521,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450770934.254, "dur": 2.416, + "args": { + "External id": 457522,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450770982.309, "dur": 2.642, + "args": { + "External id": 457523,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450770983.220, "dur": 1.563, + "args": { + "External id": 457524,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338707, "tid": 2379456, + "ts": 6267450771027.233, "dur": 248.830, + "args": { + "External id": 457525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2379456, + "ts": 6267450771034.926, "dur": 13.168, + "args": { + "External id": 457526,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450771043.018, "dur": 1.094, + "args": { + "External id": 457527,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450771051.004, "dur": 55.050, + "args": { + "External id": 457528,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450771100.877, "dur": 3.540, + "args": { + "External id": 457529,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2379456, + "ts": 6267450771108.518, "dur": 4.522, + "args": { + "External id": 457530,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450771112.094, "dur": 0.518, + "args": { + "External id": 457531,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450771113.764, "dur": 5.310, + "args": { + "External id": 457532,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450771117.688, "dur": 0.574, + "args": { + "External id": 457533,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450771124.595, "dur": 4.466, + "args": { + "External id": 457534,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450771128.032, "dur": 0.627, + "args": { + "External id": 457535,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450771132.747, "dur": 10.083, + "args": { + "External id": 457536,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450771139.919, "dur": 2.697, + "args": { + "External id": 457537,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450771143.722, "dur": 3.970, + "args": { + "External id": 457538,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450771146.879, "dur": 0.480, + "args": { + "External id": 457539,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450771148.520, "dur": 2.375, + "args": { + "External id": 457540,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450771149.117, "dur": 1.674, + "args": { + "External id": 457541,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267450771153.239, "dur": 101.533, + "args": { + "External id": 457542,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450771256.937, "dur": 3.735, + "args": { + "External id": 457543,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450771263.916, "dur": 6.824, + "args": { + "External id": 457544,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450771269.366, "dur": 0.722, + "args": { + "External id": 457545,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450771273.379, "dur": 1.203, + "args": { + "External id": 457546,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450771346.801, "dur": 12.560, + "args": { + "External id": 457547,"Record function id": 0, "Ev Idx": 330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450771349.428, "dur": 9.032, + "args": { + "External id": 457548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450771352.569, "dur": 4.784, + "args": { + "External id": 457549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450771354.004, "dur": 3.203, + "args": { + "External id": 457550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450771364.711, "dur": 14.185, + "args": { + "External id": 457551,"Record function id": 0, "Sequence number": 5036556, "Fwd thread id": 1, "Ev Idx": 334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450771366.175, "dur": 7.794, + "args": { + "External id": 457552,"Sequence number": 5036556, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 335 + } + }, + { + "ph": "f", "id": 34, "pid": 2338707, "tid": 2379456, "ts": 6267450771366.175, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450771371.582, "dur": 2.119, + "args": { + "External id": 457553,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450771372.428, "dur": 1.129, + "args": { + "External id": 457554,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450771383.710, "dur": 162.902, + "args": { + "External id": 457555,"Record function id": 0, "Sequence number": 5036555, "Fwd thread id": 1, "Ev Idx": 338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450771384.924, "dur": 153.381, + "args": { + "External id": 457556,"Sequence number": 5036555, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 339 + } + }, + { + "ph": "f", "id": 35, "pid": 2338707, "tid": 2379456, "ts": 6267450771384.924, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450771393.525, "dur": 5.125, + "args": { + "External id": 457557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450771395.493, "dur": 2.328, + "args": { + "External id": 457558,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450771396.997, "dur": 0.612, + "args": { + "External id": 457559,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450771400.540, "dur": 66.453, + "args": { + "External id": 457560,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450771468.737, "dur": 7.510, + "args": { + "External id": 457561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450771469.556, "dur": 5.960, + "args": { + "External id": 457562,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450771474.284, "dur": 1.042, + "args": { + "External id": 457563,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450771478.215, "dur": 6.004, + "args": { + "External id": 457564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450771479.486, "dur": 4.015, + "args": { + "External id": 457565,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450771482.913, "dur": 0.509, + "args": { + "External id": 457566,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450771484.746, "dur": 52.360, + "args": { + "External id": 457567,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450771552.513, "dur": 7.990, + "args": { + "External id": 457568,"Record function id": 0, "Sequence number": 5036554, "Fwd thread id": 1, "Ev Idx": 351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450771553.586, "dur": 4.654, + "args": { + "External id": 457569,"Sequence number": 5036554, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 352 + } + }, + { + "ph": "f", "id": 36, "pid": 2338707, "tid": 2379456, "ts": 6267450771553.586, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450771555.613, "dur": 2.451, + "args": { + "External id": 457570,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450771556.566, "dur": 1.330, + "args": { + "External id": 457571,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450771565.226, "dur": 15.442, + "args": { + "External id": 457572,"Record function id": 0, "Sequence number": 5036553, "Fwd thread id": 1, "Ev Idx": 355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450771569.294, "dur": 7.666, + "args": { + "External id": 457573,"Sequence number": 5036553, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 356 + } + }, + { + "ph": "f", "id": 37, "pid": 2338707, "tid": 2379456, "ts": 6267450771569.294, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450771570.769, "dur": 5.917, + "args": { + "External id": 457574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450771571.473, "dur": 4.613, + "args": { + "External id": 457575,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450771575.408, "dur": 0.514, + "args": { + "External id": 457576,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450771585.536, "dur": 23.299, + "args": { + "External id": 457577,"Record function id": 0, "Ev Idx": 360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450771586.957, "dur": 4.297, + "args": { + "External id": 457578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450771588.498, "dur": 2.048, + "args": { + "External id": 457579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450771589.145, "dur": 1.307, + "args": { + "External id": 457580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450771618.767, "dur": 12.498, + "args": { + "External id": 457581,"Record function id": 0, "Sequence number": 5036552, "Fwd thread id": 1, "Ev Idx": 364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450771620.335, "dur": 7.812, + "args": { + "External id": 457582,"Sequence number": 5036552, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 365 + } + }, + { + "ph": "f", "id": 38, "pid": 2338707, "tid": 2379456, "ts": 6267450771620.335, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450771622.489, "dur": 5.449, + "args": { + "External id": 457583,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450771626.291, "dur": 1.522, + "args": { + "External id": 457584,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450771635.107, "dur": 135.293, + "args": { + "External id": 457585,"Record function id": 0, "Sequence number": 5036551, "Fwd thread id": 1, "Ev Idx": 368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450771636.294, "dur": 120.953, + "args": { + "External id": 457586,"Sequence number": 5036551, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 369 + } + }, + { + "ph": "f", "id": 39, "pid": 2338707, "tid": 2379456, "ts": 6267450771636.294, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450771640.593, "dur": 3.359, + "args": { + "External id": 457587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450771641.405, "dur": 2.002, + "args": { + "External id": 457588,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450771642.583, "dur": 0.664, + "args": { + "External id": 457589,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450771647.367, "dur": 48.783, + "args": { + "External id": 457590,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450771697.530, "dur": 6.541, + "args": { + "External id": 457591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450771698.628, "dur": 4.805, + "args": { + "External id": 457592,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450771702.320, "dur": 0.968, + "args": { + "External id": 457593,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450771705.819, "dur": 7.854, + "args": { + "External id": 457594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450771706.947, "dur": 5.901, + "args": { + "External id": 457595,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450771709.932, "dur": 2.795, + "args": { + "External id": 457596,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450771714.180, "dur": 42.122, + "args": { + "External id": 457597,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450771775.825, "dur": 45.184, + "args": { + "External id": 457598,"Record function id": 0, "Sequence number": 5036550, "Fwd thread id": 1, "Ev Idx": 381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450771777.087, "dur": 6.730, + "args": { + "External id": 457599,"Sequence number": 5036550, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 382 + } + }, + { + "ph": "f", "id": 40, "pid": 2338707, "tid": 2379456, "ts": 6267450771777.087, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450771779.173, "dur": 4.479, + "args": { + "External id": 457600,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450771782.291, "dur": 1.184, + "args": { + "External id": 457601,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267450771788.635, "dur": 28.452, + "args": { + "External id": 457602,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450771825.832, "dur": 12.341, + "args": { + "External id": 457603,"Record function id": 0, "Sequence number": 5036549, "Fwd thread id": 1, "Ev Idx": 386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450771826.928, "dur": 8.293, + "args": { + "External id": 457604,"Sequence number": 5036549, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 387 + } + }, + { + "ph": "f", "id": 41, "pid": 2338707, "tid": 2379456, "ts": 6267450771826.928, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450771830.908, "dur": 4.060, + "args": { + "External id": 457605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450771832.087, "dur": 2.293, + "args": { + "External id": 457606,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450771833.600, "dur": 0.622, + "args": { + "External id": 457607,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450771843.408, "dur": 7.289, + "args": { + "External id": 457608,"Record function id": 0, "Ev Idx": 391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450771845.334, "dur": 4.666, + "args": { + "External id": 457609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450771846.947, "dur": 2.540, + "args": { + "External id": 457610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450771847.753, "dur": 1.642, + "args": { + "External id": 457611,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450771855.943, "dur": 607.657, + "args": { + "External id": 457612,"Record function id": 0, "Sequence number": 5036548, "Fwd thread id": 1, "Ev Idx": 395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450771857.394, "dur": 584.618, + "args": { + "External id": 457613,"Sequence number": 5036548, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 396 + } + }, + { + "ph": "f", "id": 42, "pid": 2338707, "tid": 2379456, "ts": 6267450771857.394, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2379456, + "ts": 6267450771893.603, "dur": 44.850, + "args": { + "External id": 457614,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267450771896.217, "dur": 41.980, + "args": { + "External id": 457615,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450771900.767, "dur": 7.332, + "args": { + "External id": 457616,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450771903.849, "dur": 3.479, + "args": { + "External id": 457617,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450771909.978, "dur": 27.747, + "args": { + "External id": 457618,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450771959.479, "dur": 2.566, + "args": { + "External id": 457619,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450771960.345, "dur": 1.516, + "args": { + "External id": 457620,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450771966.602, "dur": 1.909, + "args": { + "External id": 457621,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450771967.397, "dur": 1.007, + "args": { + "External id": 457622,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450771990.977, "dur": 4.863, + "args": { + "External id": 457623,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450772010.258, "dur": 3.220, + "args": { + "External id": 457624,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450772292.285, "dur": 4.083, + "args": { + "External id": 457625,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450772302.775, "dur": 47.221, + "args": { + "External id": 457626,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450772318.302, "dur": 1.298, + "args": { + "External id": 457627,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267450772358.132, "dur": 42.237, + "args": { + "External id": 457628,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267450772360.240, "dur": 39.877, + "args": { + "External id": 457629,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450772365.780, "dur": 7.877, + "args": { + "External id": 457630,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450772378.390, "dur": 21.052, + "args": { + "External id": 457631,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267450772405.839, "dur": 3.216, + "args": { + "External id": 457632,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450772407.313, "dur": 1.562, + "args": { + "External id": 457633,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450772418.349, "dur": 5.750, + "args": { + "External id": 457634,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450772422.183, "dur": 1.706, + "args": { + "External id": 457635,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450772426.504, "dur": 1.858, + "args": { + "External id": 457636,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450772427.367, "dur": 0.885, + "args": { + "External id": 457637,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450772480.485, "dur": 11.184, + "args": { + "External id": 457638,"Record function id": 0, "Ev Idx": 421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450772483.207, "dur": 7.536, + "args": { + "External id": 457639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450772485.996, "dur": 3.614, + "args": { + "External id": 457640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450772487.150, "dur": 2.282, + "args": { + "External id": 457641,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450772496.084, "dur": 13.133, + "args": { + "External id": 457642,"Record function id": 0, "Sequence number": 5036547, "Fwd thread id": 1, "Ev Idx": 425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450772497.234, "dur": 7.351, + "args": { + "External id": 457643,"Sequence number": 5036547, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 426 + } + }, + { + "ph": "f", "id": 43, "pid": 2338707, "tid": 2379456, "ts": 6267450772497.234, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450772501.863, "dur": 2.424, + "args": { + "External id": 457644,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450772502.607, "dur": 1.506, + "args": { + "External id": 457645,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450772516.468, "dur": 213.338, + "args": { + "External id": 457646,"Record function id": 0, "Sequence number": 5036546, "Fwd thread id": 1, "Ev Idx": 429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450772517.721, "dur": 204.722, + "args": { + "External id": 457647,"Sequence number": 5036546, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 430 + } + }, + { + "ph": "f", "id": 44, "pid": 2338707, "tid": 2379456, "ts": 6267450772517.721, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450772522.177, "dur": 5.789, + "args": { + "External id": 457648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450772524.001, "dur": 3.310, + "args": { + "External id": 457649,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450772525.904, "dur": 1.138, + "args": { + "External id": 457650,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450772529.626, "dur": 111.051, + "args": { + "External id": 457651,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450772643.852, "dur": 13.318, + "args": { + "External id": 457652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450772645.240, "dur": 10.895, + "args": { + "External id": 457653,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450772650.248, "dur": 5.628, + "args": { + "External id": 457654,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450772659.428, "dur": 6.812, + "args": { + "External id": 457655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450772660.989, "dur": 4.525, + "args": { + "External id": 457656,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450772664.803, "dur": 0.584, + "args": { + "External id": 457657,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450772667.322, "dur": 53.971, + "args": { + "External id": 457658,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450772738.222, "dur": 11.864, + "args": { + "External id": 457659,"Record function id": 0, "Sequence number": 5036545, "Fwd thread id": 1, "Ev Idx": 442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450772739.857, "dur": 7.241, + "args": { + "External id": 457660,"Sequence number": 5036545, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 443 + } + }, + { + "ph": "f", "id": 45, "pid": 2338707, "tid": 2379456, "ts": 6267450772739.857, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450772741.527, "dur": 5.393, + "args": { + "External id": 457661,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450772745.080, "dur": 1.669, + "args": { + "External id": 457662,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450772754.251, "dur": 11.450, + "args": { + "External id": 457663,"Record function id": 0, "Sequence number": 5036544, "Fwd thread id": 1, "Ev Idx": 446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450772755.719, "dur": 7.302, + "args": { + "External id": 457664,"Sequence number": 5036544, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 447 + } + }, + { + "ph": "f", "id": 46, "pid": 2338707, "tid": 2379456, "ts": 6267450772755.719, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450772757.179, "dur": 5.601, + "args": { + "External id": 457665,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450772757.890, "dur": 4.294, + "args": { + "External id": 457666,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450772761.273, "dur": 0.756, + "args": { + "External id": 457667,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450772770.899, "dur": 7.796, + "args": { + "External id": 457668,"Record function id": 0, "Ev Idx": 451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450772772.325, "dur": 5.762, + "args": { + "External id": 457669,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450772774.380, "dur": 3.340, + "args": { + "External id": 457670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450772775.480, "dur": 2.118, + "args": { + "External id": 457671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450772782.674, "dur": 10.662, + "args": { + "External id": 457672,"Record function id": 0, "Sequence number": 5036543, "Fwd thread id": 1, "Ev Idx": 455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450772786.447, "dur": 4.112, + "args": { + "External id": 457673,"Sequence number": 5036543, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 456 + } + }, + { + "ph": "f", "id": 47, "pid": 2338707, "tid": 2379456, "ts": 6267450772786.447, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450772787.991, "dur": 2.394, + "args": { + "External id": 457674,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450772788.800, "dur": 1.415, + "args": { + "External id": 457675,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450772802.348, "dur": 547.031, + "args": { + "External id": 457676,"Record function id": 0, "Sequence number": 5036542, "Fwd thread id": 1, "Ev Idx": 459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450772803.670, "dur": 516.131, + "args": { + "External id": 457677,"Sequence number": 5036542, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 460 + } + }, + { + "ph": "f", "id": 48, "pid": 2338707, "tid": 2379456, "ts": 6267450772803.670, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450772830.417, "dur": 13.933, + "args": { + "External id": 457678,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450772838.463, "dur": 5.250, + "args": { + "External id": 457679,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450772847.087, "dur": 6.833, + "args": { + "External id": 457680,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450772847.935, "dur": 5.766, + "args": { + "External id": 457681,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450772856.070, "dur": 6.181, + "args": { + "External id": 457682,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450772859.372, "dur": 2.581, + "args": { + "External id": 457683,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450772902.071, "dur": 380.374, + "args": { + "External id": 457684,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450773021.324, "dur": 3.773, + "args": { + "External id": 457685,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450773029.685, "dur": 5.399, + "args": { + "External id": 457686,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450773038.583, "dur": 2.433, + "args": { + "External id": 457687,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450773042.131, "dur": 2.364, + "args": { + "External id": 457688,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450773146.994, "dur": 3.915, + "args": { + "External id": 457689,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450773148.070, "dur": 2.490, + "args": { + "External id": 457690,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450773153.027, "dur": 36.000, + "args": { + "External id": 457691,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450773159.727, "dur": 2.890, + "args": { + "External id": 457692,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450773190.553, "dur": 1.771, + "args": { + "External id": 457693,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450773191.459, "dur": 0.760, + "args": { + "External id": 457694,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450773198.444, "dur": 22.099, + "args": { + "External id": 457695,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450773203.432, "dur": 3.349, + "args": { + "External id": 457696,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450773300.057, "dur": 5.254, + "args": { + "External id": 457697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450773309.052, "dur": 1.185, + "args": { + "External id": 457698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450773315.771, "dur": 0.734, + "args": { + "External id": 457699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450773363.433, "dur": 330.891, + "args": { + "External id": 457700,"Record function id": 0, "Sequence number": 5036541, "Fwd thread id": 1, "Ev Idx": 483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450773365.547, "dur": 316.849, + "args": { + "External id": 457701,"Sequence number": 5036541, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 484 + } + }, + { + "ph": "f", "id": 49, "pid": 2338707, "tid": 2379456, "ts": 6267450773365.547, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267450773395.251, "dur": 58.441, + "args": { + "External id": 457702,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450773398.860, "dur": 5.625, + "args": { + "External id": 457703,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450773406.627, "dur": 46.348, + "args": { + "External id": 457704,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450773467.315, "dur": 8.148, + "args": { + "External id": 457705,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450773472.373, "dur": 2.723, + "args": { + "External id": 457706,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450773706.926, "dur": 223.401, + "args": { + "External id": 457707,"Record function id": 0, "Sequence number": 5036540, "Fwd thread id": 1, "Ev Idx": 490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450773709.171, "dur": 212.449, + "args": { + "External id": 457708,"Sequence number": 5036540, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 491 + } + }, + { + "ph": "f", "id": 50, "pid": 2338707, "tid": 2379456, "ts": 6267450773709.171, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267450773726.320, "dur": 58.200, + "args": { + "External id": 457709,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450773729.630, "dur": 4.532, + "args": { + "External id": 457710,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450773735.386, "dur": 48.467, + "args": { + "External id": 457711,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450773794.164, "dur": 9.980, + "args": { + "External id": 457712,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450773799.785, "dur": 3.964, + "args": { + "External id": 457713,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450773938.446, "dur": 20.114, + "args": { + "External id": 457714,"Record function id": 0, "Sequence number": 5036539, "Fwd thread id": 1, "Ev Idx": 497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450773943.029, "dur": 11.933, + "args": { + "External id": 457715,"Sequence number": 5036539, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 498 + } + }, + { + "ph": "f", "id": 51, "pid": 2338707, "tid": 2379456, "ts": 6267450773943.029, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450773945.612, "dur": 9.052, + "args": { + "External id": 457716,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450773947.113, "dur": 7.283, + "args": { + "External id": 457717,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450773962.914, "dur": 11.624, + "args": { + "External id": 457718,"Record function id": 0, "Sequence number": 5036538, "Fwd thread id": 1, "Ev Idx": 501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450773964.147, "dur": 6.755, + "args": { + "External id": 457719,"Sequence number": 5036538, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 502 + } + }, + { + "ph": "f", "id": 52, "pid": 2338707, "tid": 2379456, "ts": 6267450773964.147, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450773965.327, "dur": 5.380, + "args": { + "External id": 457720,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450773969.367, "dur": 1.218, + "args": { + "External id": 457721,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450773978.140, "dur": 10.507, + "args": { + "External id": 457722,"Record function id": 0, "Sequence number": 5036537, "Fwd thread id": 1, "Ev Idx": 505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450773979.182, "dur": 6.518, + "args": { + "External id": 457723,"Sequence number": 5036537, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 506 + } + }, + { + "ph": "f", "id": 53, "pid": 2338707, "tid": 2379456, "ts": 6267450773979.182, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450773981.130, "dur": 4.389, + "args": { + "External id": 457724,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450773984.312, "dur": 1.092, + "args": { + "External id": 457725,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450773992.977, "dur": 38.779, + "args": { + "External id": 457726,"Record function id": 0, "Sequence number": 5036536, "Fwd thread id": 1, "Ev Idx": 509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450773994.270, "dur": 34.981, + "args": { + "External id": 457727,"Sequence number": 5036536, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 510 + } + }, + { + "ph": "f", "id": 54, "pid": 2338707, "tid": 2379456, "ts": 6267450773994.270, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450773997.275, "dur": 31.778, + "args": { + "External id": 457728,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450774027.733, "dur": 1.212, + "args": { + "External id": 457729,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774035.996, "dur": 250.318, + "args": { + "External id": 457730,"Record function id": 0, "Sequence number": 5036535, "Fwd thread id": 1, "Ev Idx": 513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774037.000, "dur": 236.139, + "args": { + "External id": 457731,"Sequence number": 5036535, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 514 + } + }, + { + "ph": "f", "id": 55, "pid": 2338707, "tid": 2379456, "ts": 6267450774037.000, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450774044.430, "dur": 7.261, + "args": { + "External id": 457732,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450774046.588, "dur": 4.253, + "args": { + "External id": 457733,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450774048.800, "dur": 1.713, + "args": { + "External id": 457734,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450774094.400, "dur": 95.539, + "args": { + "External id": 457735,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450774192.561, "dur": 11.093, + "args": { + "External id": 457736,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450774193.554, "dur": 8.888, + "args": { + "External id": 457737,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450774198.689, "dur": 3.553, + "args": { + "External id": 457738,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450774206.271, "dur": 6.739, + "args": { + "External id": 457739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450774207.475, "dur": 4.767, + "args": { + "External id": 457740,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450774211.367, "dur": 0.749, + "args": { + "External id": 457741,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450774213.958, "dur": 57.972, + "args": { + "External id": 457742,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774295.933, "dur": 10.810, + "args": { + "External id": 457743,"Record function id": 0, "Sequence number": 5036534, "Fwd thread id": 1, "Ev Idx": 526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774297.345, "dur": 6.821, + "args": { + "External id": 457744,"Sequence number": 5036534, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 527 + } + }, + { + "ph": "f", "id": 56, "pid": 2338707, "tid": 2379456, "ts": 6267450774297.345, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450774298.845, "dur": 5.103, + "args": { + "External id": 457745,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450774302.460, "dur": 1.388, + "args": { + "External id": 457746,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774310.899, "dur": 12.096, + "args": { + "External id": 457747,"Record function id": 0, "Sequence number": 5036533, "Fwd thread id": 1, "Ev Idx": 530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774312.408, "dur": 7.586, + "args": { + "External id": 457748,"Sequence number": 5036533, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 531 + } + }, + { + "ph": "f", "id": 57, "pid": 2338707, "tid": 2379456, "ts": 6267450774312.408, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450774313.613, "dur": 6.085, + "args": { + "External id": 457749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450774314.168, "dur": 4.844, + "args": { + "External id": 457750,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450774318.203, "dur": 0.706, + "args": { + "External id": 457751,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450774330.163, "dur": 13.673, + "args": { + "External id": 457752,"Record function id": 0, "Ev Idx": 535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450774331.801, "dur": 11.081, + "args": { + "External id": 457753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450774334.976, "dur": 7.433, + "args": { + "External id": 457754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450774338.948, "dur": 3.306, + "args": { + "External id": 457755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774350.620, "dur": 7.081, + "args": { + "External id": 457756,"Record function id": 0, "Sequence number": 5036532, "Fwd thread id": 1, "Ev Idx": 539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774351.601, "dur": 3.050, + "args": { + "External id": 457757,"Sequence number": 5036532, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 540 + } + }, + { + "ph": "f", "id": 58, "pid": 2338707, "tid": 2379456, "ts": 6267450774351.601, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450774352.881, "dur": 1.598, + "args": { + "External id": 457758,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450774353.501, "dur": 0.848, + "args": { + "External id": 457759,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774361.951, "dur": 117.685, + "args": { + "External id": 457760,"Record function id": 0, "Sequence number": 5036531, "Fwd thread id": 1, "Ev Idx": 543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774362.851, "dur": 106.700, + "args": { + "External id": 457761,"Sequence number": 5036531, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 544 + } + }, + { + "ph": "f", "id": 59, "pid": 2338707, "tid": 2379456, "ts": 6267450774362.851, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450774366.241, "dur": 5.578, + "args": { + "External id": 457762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450774367.008, "dur": 4.315, + "args": { + "External id": 457763,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450774370.667, "dur": 0.521, + "args": { + "External id": 457764,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450774372.834, "dur": 33.450, + "args": { + "External id": 457765,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450774407.714, "dur": 6.646, + "args": { + "External id": 457766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450774408.262, "dur": 5.444, + "args": { + "External id": 457767,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450774412.524, "dur": 1.030, + "args": { + "External id": 457768,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450774415.835, "dur": 9.219, + "args": { + "External id": 457769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450774420.088, "dur": 4.437, + "args": { + "External id": 457770,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450774421.041, "dur": 3.396, + "args": { + "External id": 457771,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450774425.537, "dur": 42.996, + "args": { + "External id": 457772,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774485.345, "dur": 48.449, + "args": { + "External id": 457773,"Record function id": 0, "Sequence number": 5036530, "Fwd thread id": 1, "Ev Idx": 556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774486.403, "dur": 6.911, + "args": { + "External id": 457774,"Sequence number": 5036530, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 557 + } + }, + { + "ph": "f", "id": 60, "pid": 2338707, "tid": 2379456, "ts": 6267450774486.403, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450774488.183, "dur": 4.958, + "args": { + "External id": 457775,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450774491.474, "dur": 1.499, + "args": { + "External id": 457776,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267450774497.088, "dur": 33.692, + "args": { + "External id": 457777,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774538.801, "dur": 13.722, + "args": { + "External id": 457778,"Record function id": 0, "Sequence number": 5036529, "Fwd thread id": 1, "Ev Idx": 561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774542.622, "dur": 5.512, + "args": { + "External id": 457779,"Sequence number": 5036529, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 562 + } + }, + { + "ph": "f", "id": 61, "pid": 2338707, "tid": 2379456, "ts": 6267450774542.622, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450774543.666, "dur": 4.230, + "args": { + "External id": 457780,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450774545.043, "dur": 2.121, + "args": { + "External id": 457781,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450774546.226, "dur": 0.802, + "args": { + "External id": 457782,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450774557.328, "dur": 6.123, + "args": { + "External id": 457783,"Record function id": 0, "Ev Idx": 566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450774558.790, "dur": 3.976, + "args": { + "External id": 457784,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450774560.041, "dur": 2.420, + "args": { + "External id": 457785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450774560.724, "dur": 1.612, + "args": { + "External id": 457786,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774567.881, "dur": 12.406, + "args": { + "External id": 457787,"Record function id": 0, "Sequence number": 5036528, "Fwd thread id": 1, "Ev Idx": 570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774569.068, "dur": 8.854, + "args": { + "External id": 457788,"Sequence number": 5036528, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 571 + } + }, + { + "ph": "f", "id": 62, "pid": 2338707, "tid": 2379456, "ts": 6267450774569.068, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450774573.251, "dur": 4.465, + "args": { + "External id": 457789,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450774576.427, "dur": 1.113, + "args": { + "External id": 457790,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774584.533, "dur": 157.819, + "args": { + "External id": 457791,"Record function id": 0, "Sequence number": 5036527, "Fwd thread id": 1, "Ev Idx": 574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774585.528, "dur": 144.641, + "args": { + "External id": 457792,"Sequence number": 5036527, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 575 + } + }, + { + "ph": "f", "id": 63, "pid": 2338707, "tid": 2379456, "ts": 6267450774585.528, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450774588.915, "dur": 2.521, + "args": { + "External id": 457793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450774589.464, "dur": 1.464, + "args": { + "External id": 457794,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450774590.357, "dur": 0.427, + "args": { + "External id": 457795,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450774617.508, "dur": 53.772, + "args": { + "External id": 457796,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450774672.995, "dur": 6.123, + "args": { + "External id": 457797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450774673.641, "dur": 4.700, + "args": { + "External id": 457798,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450774675.170, "dur": 3.001, + "args": { + "External id": 457799,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450774681.284, "dur": 6.505, + "args": { + "External id": 457800,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450774682.571, "dur": 4.591, + "args": { + "External id": 457801,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450774686.597, "dur": 0.444, + "args": { + "External id": 457802,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450774690.995, "dur": 38.096, + "args": { + "External id": 457803,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774749.797, "dur": 31.720, + "args": { + "External id": 457804,"Record function id": 0, "Sequence number": 5036526, "Fwd thread id": 1, "Ev Idx": 587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774751.145, "dur": 4.365, + "args": { + "External id": 457805,"Sequence number": 5036526, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 588 + } + }, + { + "ph": "f", "id": 64, "pid": 2338707, "tid": 2379456, "ts": 6267450774751.145, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450774752.834, "dur": 2.477, + "args": { + "External id": 457806,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450774753.675, "dur": 1.466, + "args": { + "External id": 457807,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267450774758.682, "dur": 20.146, + "args": { + "External id": 457808,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774785.995, "dur": 13.781, + "args": { + "External id": 457809,"Record function id": 0, "Sequence number": 5036525, "Fwd thread id": 1, "Ev Idx": 592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450774787.162, "dur": 10.200, + "args": { + "External id": 457810,"Sequence number": 5036525, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 593 + } + }, + { + "ph": "f", "id": 65, "pid": 2338707, "tid": 2379456, "ts": 6267450774787.162, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450774788.016, "dur": 9.107, + "args": { + "External id": 457811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450774788.947, "dur": 7.517, + "args": { + "External id": 457812,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450774795.467, "dur": 0.858, + "args": { + "External id": 457813,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450774804.569, "dur": 7.122, + "args": { + "External id": 457814,"Record function id": 0, "Ev Idx": 597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450774806.297, "dur": 4.800, + "args": { + "External id": 457815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450774807.992, "dur": 2.484, + "args": { + "External id": 457816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450774808.771, "dur": 1.555, + "args": { + "External id": 457817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450774817.402, "dur": 514.144, + "args": { + "External id": 457818,"Record function id": 0, "Sequence number": 5036524, "Fwd thread id": 1, "Ev Idx": 601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450774818.791, "dur": 468.698, + "args": { + "External id": 457819,"Sequence number": 5036524, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 602 + } + }, + { + "ph": "f", "id": 66, "pid": 2338707, "tid": 2379456, "ts": 6267450774818.791, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450774863.771, "dur": 2.317, + "args": { + "External id": 457820,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450774864.309, "dur": 1.471, + "args": { + "External id": 457821,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450774888.197, "dur": 7.595, + "args": { + "External id": 457822,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450774907.269, "dur": 2.172, + "args": { + "External id": 457823,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450775160.456, "dur": 3.501, + "args": { + "External id": 457824,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450775169.083, "dur": 42.742, + "args": { + "External id": 457825,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450775181.651, "dur": 1.173, + "args": { + "External id": 457826,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267450775219.087, "dur": 37.886, + "args": { + "External id": 457827,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267450775221.190, "dur": 35.530, + "args": { + "External id": 457828,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450775228.860, "dur": 4.931, + "args": { + "External id": 457829,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450775235.872, "dur": 20.210, + "args": { + "External id": 457830,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267450775265.974, "dur": 2.803, + "args": { + "External id": 457831,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450775267.258, "dur": 1.396, + "args": { + "External id": 457832,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450775276.946, "dur": 2.064, + "args": { + "External id": 457833,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450775277.643, "dur": 1.231, + "args": { + "External id": 457834,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267450775303.083, "dur": 19.521, + "args": { + "External id": 457835,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450775347.221, "dur": 13.678, + "args": { + "External id": 457836,"Record function id": 0, "Ev Idx": 619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450775352.684, "dur": 7.225, + "args": { + "External id": 457837,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450775355.144, "dur": 3.643, + "args": { + "External id": 457838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450775356.338, "dur": 2.320, + "args": { + "External id": 457839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450775368.068, "dur": 8.603, + "args": { + "External id": 457840,"Record function id": 0, "Sequence number": 5036523, "Fwd thread id": 1, "Ev Idx": 623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450775369.577, "dur": 1.964, + "args": { + "External id": 457841,"Sequence number": 5036523, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 624 + } + }, + { + "ph": "f", "id": 67, "pid": 2338707, "tid": 2379456, "ts": 6267450775369.577, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450775381.314, "dur": 535.778, + "args": { + "External id": 457842,"Record function id": 0, "Sequence number": 5036522, "Fwd thread id": 1, "Ev Idx": 625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450775382.948, "dur": 520.576, + "args": { + "External id": 457843,"Sequence number": 5036522, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 626 + } + }, + { + "ph": "f", "id": 68, "pid": 2338707, "tid": 2379456, "ts": 6267450775382.948, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450775424.674, "dur": 9.492, + "args": { + "External id": 457844,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450775429.954, "dur": 3.827, + "args": { + "External id": 457845,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450775438.194, "dur": 10.991, + "args": { + "External id": 457846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450775439.961, "dur": 8.383, + "args": { + "External id": 457847,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450775444.566, "dur": 3.571, + "args": { + "External id": 457848,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2379456, + "ts": 6267450775453.147, "dur": 104.950, + "args": { + "External id": 457849,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450775456.911, "dur": 2.468, + "args": { + "External id": 457850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450775457.614, "dur": 1.248, + "args": { + "External id": 457851,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450775458.377, "dur": 0.387, + "args": { + "External id": 457852,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2379456, + "ts": 6267450775460.757, "dur": 96.626, + "args": { + "External id": 457853,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450775462.696, "dur": 93.523, + "args": { + "External id": 457854,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267450775562.688, "dur": 2.970, + "args": { + "External id": 457855,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450775564.018, "dur": 1.461, + "args": { + "External id": 457856,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450775626.218, "dur": 8.147, + "args": { + "External id": 457857,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450775638.406, "dur": 1.630, + "args": { + "External id": 457858,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450775640.854, "dur": 1.911, + "args": { + "External id": 457859,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450775684.969, "dur": 2.765, + "args": { + "External id": 457860,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450775685.693, "dur": 1.865, + "args": { + "External id": 457861,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338707, "tid": 2379456, + "ts": 6267450775717.638, "dur": 160.319, + "args": { + "External id": 457862,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2379456, + "ts": 6267450775724.242, "dur": 13.438, + "args": { + "External id": 457863,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450775730.776, "dur": 5.948, + "args": { + "External id": 457864,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450775739.457, "dur": 6.552, + "args": { + "External id": 457865,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450775744.484, "dur": 0.688, + "args": { + "External id": 457866,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2379456, + "ts": 6267450775747.723, "dur": 2.184, + "args": { + "External id": 457867,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450775748.794, "dur": 0.662, + "args": { + "External id": 457868,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450775753.445, "dur": 2.006, + "args": { + "External id": 457869,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450775754.292, "dur": 0.571, + "args": { + "External id": 457870,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450775762.781, "dur": 1.830, + "args": { + "External id": 457871,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450775763.713, "dur": 0.527, + "args": { + "External id": 457872,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450775765.402, "dur": 8.792, + "args": { + "External id": 457873,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450775771.688, "dur": 2.310, + "args": { + "External id": 457874,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450775778.324, "dur": 4.579, + "args": { + "External id": 457875,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450775781.896, "dur": 0.662, + "args": { + "External id": 457876,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450775783.718, "dur": 4.090, + "args": { + "External id": 457877,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450775784.441, "dur": 3.257, + "args": { + "External id": 457878,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267450775789.310, "dur": 71.771, + "args": { + "External id": 457879,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450775865.791, "dur": 1.463, + "args": { + "External id": 457880,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450775868.397, "dur": 4.766, + "args": { + "External id": 457881,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450775872.001, "dur": 0.538, + "args": { + "External id": 457882,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450775875.785, "dur": 0.871, + "args": { + "External id": 457883,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450775931.075, "dur": 10.267, + "args": { + "External id": 457884,"Record function id": 0, "Ev Idx": 667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450775933.644, "dur": 6.783, + "args": { + "External id": 457885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450775936.194, "dur": 3.210, + "args": { + "External id": 457886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450775937.199, "dur": 2.075, + "args": { + "External id": 457887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450775946.082, "dur": 9.269, + "args": { + "External id": 457888,"Record function id": 0, "Sequence number": 5036521, "Fwd thread id": 1, "Ev Idx": 671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450775947.218, "dur": 4.383, + "args": { + "External id": 457889,"Sequence number": 5036521, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 672 + } + }, + { + "ph": "f", "id": 69, "pid": 2338707, "tid": 2379456, "ts": 6267450775947.218, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450775949.468, "dur": 1.846, + "args": { + "External id": 457890,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450775950.319, "dur": 0.824, + "args": { + "External id": 457891,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450775959.502, "dur": 193.211, + "args": { + "External id": 457892,"Record function id": 0, "Sequence number": 5036520, "Fwd thread id": 1, "Ev Idx": 675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450775960.687, "dur": 181.566, + "args": { + "External id": 457893,"Sequence number": 5036520, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 676 + } + }, + { + "ph": "f", "id": 70, "pid": 2338707, "tid": 2379456, "ts": 6267450775960.687, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450775969.106, "dur": 4.046, + "args": { + "External id": 457894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450775970.476, "dur": 1.976, + "args": { + "External id": 457895,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450775971.647, "dur": 0.605, + "args": { + "External id": 457896,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450775974.537, "dur": 49.704, + "args": { + "External id": 457897,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450776025.694, "dur": 9.682, + "args": { + "External id": 457898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450776026.913, "dur": 7.547, + "args": { + "External id": 457899,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450776033.066, "dur": 1.203, + "args": { + "External id": 457900,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450776037.394, "dur": 9.982, + "args": { + "External id": 457901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450776042.945, "dur": 3.849, + "args": { + "External id": 457902,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450776043.795, "dur": 2.918, + "args": { + "External id": 457903,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450776048.119, "dur": 92.425, + "args": { + "External id": 457904,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450776162.347, "dur": 13.065, + "args": { + "External id": 457905,"Record function id": 0, "Sequence number": 5036519, "Fwd thread id": 1, "Ev Idx": 688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450776163.761, "dur": 9.751, + "args": { + "External id": 457906,"Sequence number": 5036519, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 689 + } + }, + { + "ph": "f", "id": 71, "pid": 2338707, "tid": 2379456, "ts": 6267450776163.761, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450776166.011, "dur": 7.310, + "args": { + "External id": 457907,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450776171.396, "dur": 1.752, + "args": { + "External id": 457908,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450776179.997, "dur": 10.777, + "args": { + "External id": 457909,"Record function id": 0, "Sequence number": 5036518, "Fwd thread id": 1, "Ev Idx": 692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450776181.548, "dur": 5.446, + "args": { + "External id": 457910,"Sequence number": 5036518, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 693 + } + }, + { + "ph": "f", "id": 72, "pid": 2338707, "tid": 2379456, "ts": 6267450776181.548, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450776182.673, "dur": 4.068, + "args": { + "External id": 457911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450776183.462, "dur": 2.612, + "args": { + "External id": 457912,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450776184.993, "dur": 0.953, + "args": { + "External id": 457913,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450776195.712, "dur": 7.678, + "args": { + "External id": 457914,"Record function id": 0, "Ev Idx": 697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450776197.503, "dur": 5.150, + "args": { + "External id": 457915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450776199.187, "dur": 2.919, + "args": { + "External id": 457916,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450776200.140, "dur": 1.847, + "args": { + "External id": 457917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450776207.408, "dur": 12.208, + "args": { + "External id": 457918,"Record function id": 0, "Sequence number": 5036517, "Fwd thread id": 1, "Ev Idx": 701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450776208.565, "dur": 8.401, + "args": { + "External id": 457919,"Sequence number": 5036517, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 702 + } + }, + { + "ph": "f", "id": 73, "pid": 2338707, "tid": 2379456, "ts": 6267450776208.565, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450776212.429, "dur": 4.319, + "args": { + "External id": 457920,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450776215.846, "dur": 0.754, + "args": { + "External id": 457921,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450776223.581, "dur": 119.031, + "args": { + "External id": 457922,"Record function id": 0, "Sequence number": 5036516, "Fwd thread id": 1, "Ev Idx": 705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450776224.780, "dur": 107.592, + "args": { + "External id": 457923,"Sequence number": 5036516, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 706 + } + }, + { + "ph": "f", "id": 74, "pid": 2338707, "tid": 2379456, "ts": 6267450776224.780, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450776228.358, "dur": 5.258, + "args": { + "External id": 457924,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450776229.102, "dur": 3.985, + "args": { + "External id": 457925,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450776232.500, "dur": 0.448, + "args": { + "External id": 457926,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450776234.551, "dur": 42.020, + "args": { + "External id": 457927,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450776277.860, "dur": 6.095, + "args": { + "External id": 457928,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450776278.532, "dur": 4.885, + "args": { + "External id": 457929,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450776279.486, "dur": 3.774, + "args": { + "External id": 457930,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450776285.779, "dur": 8.631, + "args": { + "External id": 457931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450776287.045, "dur": 6.660, + "args": { + "External id": 457932,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450776293.087, "dur": 0.536, + "args": { + "External id": 457933,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450776294.903, "dur": 36.689, + "args": { + "External id": 457934,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450776348.268, "dur": 36.099, + "args": { + "External id": 457935,"Record function id": 0, "Sequence number": 5036515, "Fwd thread id": 1, "Ev Idx": 718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450776349.643, "dur": 3.780, + "args": { + "External id": 457936,"Sequence number": 5036515, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 719 + } + }, + { + "ph": "f", "id": 75, "pid": 2338707, "tid": 2379456, "ts": 6267450776349.643, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450776351.444, "dur": 1.803, + "args": { + "External id": 457937,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450776352.007, "dur": 1.116, + "args": { + "External id": 457938,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267450776356.835, "dur": 23.932, + "args": { + "External id": 457939,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450776389.245, "dur": 11.261, + "args": { + "External id": 457940,"Record function id": 0, "Sequence number": 5036514, "Fwd thread id": 1, "Ev Idx": 723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450776390.400, "dur": 7.411, + "args": { + "External id": 457941,"Sequence number": 5036514, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 724 + } + }, + { + "ph": "f", "id": 76, "pid": 2338707, "tid": 2379456, "ts": 6267450776390.400, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450776391.308, "dur": 6.270, + "args": { + "External id": 457942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450776394.923, "dur": 2.056, + "args": { + "External id": 457943,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450776396.388, "dur": 0.454, + "args": { + "External id": 457944,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450776407.734, "dur": 6.389, + "args": { + "External id": 457945,"Record function id": 0, "Ev Idx": 728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450776409.454, "dur": 4.065, + "args": { + "External id": 457946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450776410.639, "dur": 2.401, + "args": { + "External id": 457947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450776411.600, "dur": 1.342, + "args": { + "External id": 457948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450776421.727, "dur": 536.505, + "args": { + "External id": 457949,"Record function id": 0, "Sequence number": 5036513, "Fwd thread id": 1, "Ev Idx": 732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450776423.273, "dur": 489.551, + "args": { + "External id": 457950,"Sequence number": 5036513, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 733 + } + }, + { + "ph": "f", "id": 77, "pid": 2338707, "tid": 2379456, "ts": 6267450776423.273, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2379456, + "ts": 6267450776452.097, "dur": 38.217, + "args": { + "External id": 457951,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267450776454.081, "dur": 35.990, + "args": { + "External id": 457952,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450776457.167, "dur": 7.057, + "args": { + "External id": 457953,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450776459.791, "dur": 3.779, + "args": { + "External id": 457954,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450776465.920, "dur": 23.638, + "args": { + "External id": 457955,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450776505.493, "dur": 9.454, + "args": { + "External id": 457956,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450776510.914, "dur": 3.841, + "args": { + "External id": 457957,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450776519.229, "dur": 2.088, + "args": { + "External id": 457958,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450776519.880, "dur": 1.334, + "args": { + "External id": 457959,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450776536.114, "dur": 3.217, + "args": { + "External id": 457960,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450776554.827, "dur": 2.819, + "args": { + "External id": 457961,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450776777.129, "dur": 2.979, + "args": { + "External id": 457962,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450776785.031, "dur": 43.990, + "args": { + "External id": 457963,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450776801.382, "dur": 1.079, + "args": { + "External id": 457964,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267450776836.235, "dur": 32.382, + "args": { + "External id": 457965,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267450776837.964, "dur": 30.426, + "args": { + "External id": 457966,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450776842.495, "dur": 5.450, + "args": { + "External id": 457967,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450776849.904, "dur": 17.669, + "args": { + "External id": 457968,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267450776876.970, "dur": 5.362, + "args": { + "External id": 457969,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450776880.698, "dur": 1.467, + "args": { + "External id": 457970,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450776891.198, "dur": 4.498, + "args": { + "External id": 457971,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450776891.826, "dur": 3.762, + "args": { + "External id": 457972,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450776897.986, "dur": 4.379, + "args": { + "External id": 457973,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450776901.185, "dur": 1.072, + "args": { + "External id": 457974,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267450776936.403, "dur": 19.882, + "args": { + "External id": 457975,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450776973.472, "dur": 9.752, + "args": { + "External id": 457976,"Record function id": 0, "Ev Idx": 759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450776975.742, "dur": 6.737, + "args": { + "External id": 457977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450776978.073, "dur": 3.388, + "args": { + "External id": 457978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450776979.178, "dur": 2.152, + "args": { + "External id": 457979,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450776987.795, "dur": 9.120, + "args": { + "External id": 457980,"Record function id": 0, "Sequence number": 5036512, "Fwd thread id": 1, "Ev Idx": 763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450776989.022, "dur": 4.426, + "args": { + "External id": 457981,"Sequence number": 5036512, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 764 + } + }, + { + "ph": "f", "id": 78, "pid": 2338707, "tid": 2379456, "ts": 6267450776989.022, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450776990.922, "dur": 2.265, + "args": { + "External id": 457982,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450776991.764, "dur": 1.308, + "args": { + "External id": 457983,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450777003.682, "dur": 219.227, + "args": { + "External id": 457984,"Record function id": 0, "Sequence number": 5036511, "Fwd thread id": 1, "Ev Idx": 767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450777004.886, "dur": 205.108, + "args": { + "External id": 457985,"Sequence number": 5036511, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 768 + } + }, + { + "ph": "f", "id": 79, "pid": 2338707, "tid": 2379456, "ts": 6267450777004.886, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450777009.623, "dur": 5.250, + "args": { + "External id": 457986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450777011.299, "dur": 2.842, + "args": { + "External id": 457987,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450777012.984, "dur": 0.931, + "args": { + "External id": 457988,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450777016.382, "dur": 122.828, + "args": { + "External id": 457989,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450777142.205, "dur": 10.788, + "args": { + "External id": 457990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450777143.376, "dur": 8.653, + "args": { + "External id": 457991,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450777150.339, "dur": 1.510, + "args": { + "External id": 457992,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450777155.256, "dur": 4.750, + "args": { + "External id": 457993,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450777157.608, "dur": 1.733, + "args": { + "External id": 457994,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450777158.643, "dur": 0.619, + "args": { + "External id": 457995,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450777160.593, "dur": 48.641, + "args": { + "External id": 457996,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450777231.436, "dur": 14.103, + "args": { + "External id": 457997,"Record function id": 0, "Sequence number": 5036510, "Fwd thread id": 1, "Ev Idx": 780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450777232.575, "dur": 9.331, + "args": { + "External id": 457998,"Sequence number": 5036510, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 781 + } + }, + { + "ph": "f", "id": 80, "pid": 2338707, "tid": 2379456, "ts": 6267450777232.575, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450777234.281, "dur": 7.429, + "args": { + "External id": 457999,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450777237.423, "dur": 4.139, + "args": { + "External id": 458000,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450777251.798, "dur": 11.185, + "args": { + "External id": 458001,"Record function id": 0, "Sequence number": 5036509, "Fwd thread id": 1, "Ev Idx": 784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450777255.467, "dur": 5.176, + "args": { + "External id": 458002,"Sequence number": 5036509, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 785 + } + }, + { + "ph": "f", "id": 81, "pid": 2338707, "tid": 2379456, "ts": 6267450777255.467, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450777256.956, "dur": 3.415, + "args": { + "External id": 458003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450777257.994, "dur": 1.649, + "args": { + "External id": 458004,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450777259.065, "dur": 0.385, + "args": { + "External id": 458005,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450777268.200, "dur": 7.716, + "args": { + "External id": 458006,"Record function id": 0, "Ev Idx": 789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450777269.969, "dur": 5.287, + "args": { + "External id": 458007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450777271.706, "dur": 3.043, + "args": { + "External id": 458008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450777272.612, "dur": 2.007, + "args": { + "External id": 458009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450777279.619, "dur": 10.886, + "args": { + "External id": 458010,"Record function id": 0, "Sequence number": 5036508, "Fwd thread id": 1, "Ev Idx": 793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450777280.856, "dur": 6.939, + "args": { + "External id": 458011,"Sequence number": 5036508, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 794 + } + }, + { + "ph": "f", "id": 82, "pid": 2338707, "tid": 2379456, "ts": 6267450777280.856, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450777285.279, "dur": 2.317, + "args": { + "External id": 458012,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450777285.958, "dur": 1.477, + "args": { + "External id": 458013,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450777296.098, "dur": 460.962, + "args": { + "External id": 458014,"Record function id": 0, "Sequence number": 5036507, "Fwd thread id": 1, "Ev Idx": 797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450777300.231, "dur": 427.530, + "args": { + "External id": 458015,"Sequence number": 5036507, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 798 + } + }, + { + "ph": "f", "id": 83, "pid": 2338707, "tid": 2379456, "ts": 6267450777300.231, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450777323.165, "dur": 8.879, + "args": { + "External id": 458016,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450777326.202, "dur": 5.202, + "args": { + "External id": 458017,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450777334.577, "dur": 7.342, + "args": { + "External id": 458018,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450777337.932, "dur": 3.770, + "args": { + "External id": 458019,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450777343.460, "dur": 3.741, + "args": { + "External id": 458020,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450777344.527, "dur": 2.482, + "args": { + "External id": 458021,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450777383.522, "dur": 313.587, + "args": { + "External id": 458022,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450777477.235, "dur": 3.267, + "args": { + "External id": 458023,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450777484.927, "dur": 7.271, + "args": { + "External id": 458024,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450777493.684, "dur": 4.065, + "args": { + "External id": 458025,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450777501.582, "dur": 2.379, + "args": { + "External id": 458026,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450777553.932, "dur": 2.643, + "args": { + "External id": 458027,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450777554.967, "dur": 1.444, + "args": { + "External id": 458028,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450777558.649, "dur": 56.477, + "args": { + "External id": 458029,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450777569.475, "dur": 1.220, + "args": { + "External id": 458030,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450777617.863, "dur": 1.980, + "args": { + "External id": 458031,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450777618.678, "dur": 1.061, + "args": { + "External id": 458032,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450777620.877, "dur": 21.220, + "args": { + "External id": 458033,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450777622.824, "dur": 3.089, + "args": { + "External id": 458034,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450777712.787, "dur": 4.420, + "args": { + "External id": 458035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450777720.639, "dur": 0.706, + "args": { + "External id": 458036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450777723.586, "dur": 0.683, + "args": { + "External id": 458037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450777768.895, "dur": 278.402, + "args": { + "External id": 458038,"Record function id": 0, "Sequence number": 5036506, "Fwd thread id": 1, "Ev Idx": 821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450777770.662, "dur": 267.292, + "args": { + "External id": 458039,"Sequence number": 5036506, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 822 + } + }, + { + "ph": "f", "id": 84, "pid": 2338707, "tid": 2379456, "ts": 6267450777770.662, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267450777796.365, "dur": 54.556, + "args": { + "External id": 458040,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450777802.962, "dur": 5.352, + "args": { + "External id": 458041,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450777810.246, "dur": 39.852, + "args": { + "External id": 458042,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450777863.793, "dur": 5.395, + "args": { + "External id": 458043,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450777865.880, "dur": 2.950, + "args": { + "External id": 458044,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450778097.342, "dur": 236.324, + "args": { + "External id": 458045,"Record function id": 0, "Sequence number": 5036505, "Fwd thread id": 1, "Ev Idx": 828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450778101.326, "dur": 222.679, + "args": { + "External id": 458046,"Sequence number": 5036505, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 829 + } + }, + { + "ph": "f", "id": 85, "pid": 2338707, "tid": 2379456, "ts": 6267450778101.326, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267450778119.538, "dur": 67.874, + "args": { + "External id": 458047,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450778124.965, "dur": 8.412, + "args": { + "External id": 458048,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450778135.040, "dur": 51.625, + "args": { + "External id": 458049,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450778196.801, "dur": 4.704, + "args": { + "External id": 458050,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450778198.307, "dur": 2.872, + "args": { + "External id": 458051,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778344.624, "dur": 18.051, + "args": { + "External id": 458052,"Record function id": 0, "Sequence number": 5036504, "Fwd thread id": 1, "Ev Idx": 835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778346.551, "dur": 11.726, + "args": { + "External id": 458053,"Sequence number": 5036504, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 836 + } + }, + { + "ph": "f", "id": 86, "pid": 2338707, "tid": 2379456, "ts": 6267450778346.551, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450778349.289, "dur": 8.693, + "args": { + "External id": 458054,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450778352.615, "dur": 5.119, + "args": { + "External id": 458055,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778367.287, "dur": 7.819, + "args": { + "External id": 458056,"Record function id": 0, "Sequence number": 5036503, "Fwd thread id": 1, "Ev Idx": 839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778368.324, "dur": 4.244, + "args": { + "External id": 458057,"Sequence number": 5036503, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 840 + } + }, + { + "ph": "f", "id": 87, "pid": 2338707, "tid": 2379456, "ts": 6267450778368.324, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450778370.174, "dur": 2.221, + "args": { + "External id": 458058,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450778371.055, "dur": 1.141, + "args": { + "External id": 458059,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778381.326, "dur": 12.776, + "args": { + "External id": 458060,"Record function id": 0, "Sequence number": 5036502, "Fwd thread id": 1, "Ev Idx": 843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778382.087, "dur": 8.372, + "args": { + "External id": 458061,"Sequence number": 5036502, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 844 + } + }, + { + "ph": "f", "id": 88, "pid": 2338707, "tid": 2379456, "ts": 6267450778382.087, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450778386.516, "dur": 3.744, + "args": { + "External id": 458062,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450778389.230, "dur": 0.879, + "args": { + "External id": 458063,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778398.203, "dur": 7.083, + "args": { + "External id": 458064,"Record function id": 0, "Sequence number": 5036501, "Fwd thread id": 1, "Ev Idx": 847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778399.204, "dur": 3.522, + "args": { + "External id": 458065,"Sequence number": 5036501, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 848 + } + }, + { + "ph": "f", "id": 89, "pid": 2338707, "tid": 2379456, "ts": 6267450778399.204, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450778400.710, "dur": 1.813, + "args": { + "External id": 458066,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450778401.355, "dur": 1.034, + "args": { + "External id": 458067,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778409.395, "dur": 217.243, + "args": { + "External id": 458068,"Record function id": 0, "Sequence number": 5036500, "Fwd thread id": 1, "Ev Idx": 851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778410.446, "dur": 204.998, + "args": { + "External id": 458069,"Sequence number": 5036500, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 852 + } + }, + { + "ph": "f", "id": 90, "pid": 2338707, "tid": 2379456, "ts": 6267450778410.446, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450778416.113, "dur": 10.256, + "args": { + "External id": 458070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450778421.076, "dur": 4.462, + "args": { + "External id": 458071,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450778423.470, "dur": 1.717, + "args": { + "External id": 458072,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450778428.725, "dur": 87.837, + "args": { + "External id": 458073,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450778518.412, "dur": 10.541, + "args": { + "External id": 458074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450778519.537, "dur": 8.388, + "args": { + "External id": 458075,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450778523.565, "dur": 4.137, + "args": { + "External id": 458076,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450778533.826, "dur": 3.520, + "args": { + "External id": 458077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450778535.391, "dur": 1.300, + "args": { + "External id": 458078,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450778536.088, "dur": 0.525, + "args": { + "External id": 458079,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450778538.389, "dur": 75.442, + "args": { + "External id": 458080,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778635.820, "dur": 12.242, + "args": { + "External id": 458081,"Record function id": 0, "Sequence number": 5036499, "Fwd thread id": 1, "Ev Idx": 864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778637.427, "dur": 8.328, + "args": { + "External id": 458082,"Sequence number": 5036499, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 865 + } + }, + { + "ph": "f", "id": 91, "pid": 2338707, "tid": 2379456, "ts": 6267450778637.427, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450778639.574, "dur": 6.009, + "args": { + "External id": 458083,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450778643.387, "dur": 2.037, + "args": { + "External id": 458084,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778655.243, "dur": 9.481, + "args": { + "External id": 458085,"Record function id": 0, "Sequence number": 5036498, "Fwd thread id": 1, "Ev Idx": 868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778656.480, "dur": 5.283, + "args": { + "External id": 458086,"Sequence number": 5036498, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 869 + } + }, + { + "ph": "f", "id": 92, "pid": 2338707, "tid": 2379456, "ts": 6267450778656.480, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450778657.608, "dur": 3.885, + "args": { + "External id": 458087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450778658.667, "dur": 2.225, + "args": { + "External id": 458088,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450778659.961, "dur": 0.791, + "args": { + "External id": 458089,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450778671.916, "dur": 11.639, + "args": { + "External id": 458090,"Record function id": 0, "Ev Idx": 873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450778673.640, "dur": 8.957, + "args": { + "External id": 458091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450778676.686, "dur": 5.333, + "args": { + "External id": 458092,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450778678.137, "dur": 3.654, + "args": { + "External id": 458093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778687.363, "dur": 12.402, + "args": { + "External id": 458094,"Record function id": 0, "Sequence number": 5036497, "Fwd thread id": 1, "Ev Idx": 877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778688.687, "dur": 8.406, + "args": { + "External id": 458095,"Sequence number": 5036497, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 878 + } + }, + { + "ph": "f", "id": 93, "pid": 2338707, "tid": 2379456, "ts": 6267450778688.687, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450778692.437, "dur": 4.465, + "args": { + "External id": 458096,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450778695.631, "dur": 1.082, + "args": { + "External id": 458097,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778703.613, "dur": 137.354, + "args": { + "External id": 458098,"Record function id": 0, "Sequence number": 5036496, "Fwd thread id": 1, "Ev Idx": 881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778704.593, "dur": 126.032, + "args": { + "External id": 458099,"Sequence number": 5036496, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 882 + } + }, + { + "ph": "f", "id": 94, "pid": 2338707, "tid": 2379456, "ts": 6267450778704.593, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450778708.107, "dur": 5.817, + "args": { + "External id": 458100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450778708.974, "dur": 4.426, + "args": { + "External id": 458101,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450778712.340, "dur": 0.882, + "args": { + "External id": 458102,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450778714.806, "dur": 43.031, + "args": { + "External id": 458103,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450778759.140, "dur": 11.611, + "args": { + "External id": 458104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450778765.897, "dur": 4.303, + "args": { + "External id": 458105,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450778767.165, "dur": 2.862, + "args": { + "External id": 458106,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450778772.740, "dur": 8.265, + "args": { + "External id": 458107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450778774.012, "dur": 6.282, + "args": { + "External id": 458108,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450778779.638, "dur": 0.485, + "args": { + "External id": 458109,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450778781.556, "dur": 47.893, + "args": { + "External id": 458110,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778847.188, "dur": 43.487, + "args": { + "External id": 458111,"Record function id": 0, "Sequence number": 5036495, "Fwd thread id": 1, "Ev Idx": 894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778848.263, "dur": 4.343, + "args": { + "External id": 458112,"Sequence number": 5036495, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 895 + } + }, + { + "ph": "f", "id": 95, "pid": 2338707, "tid": 2379456, "ts": 6267450778848.263, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450778850.330, "dur": 2.080, + "args": { + "External id": 458113,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450778850.924, "dur": 1.316, + "args": { + "External id": 458114,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267450778859.881, "dur": 27.820, + "args": { + "External id": 458115,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778898.152, "dur": 18.571, + "args": { + "External id": 458116,"Record function id": 0, "Sequence number": 5036494, "Fwd thread id": 1, "Ev Idx": 899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778899.057, "dur": 15.023, + "args": { + "External id": 458117,"Sequence number": 5036494, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 900 + } + }, + { + "ph": "f", "id": 96, "pid": 2338707, "tid": 2379456, "ts": 6267450778899.057, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450778902.627, "dur": 11.143, + "args": { + "External id": 458118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450778903.778, "dur": 9.223, + "args": { + "External id": 458119,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450778912.234, "dur": 0.635, + "args": { + "External id": 458120,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450778921.636, "dur": 8.931, + "args": { + "External id": 458121,"Record function id": 0, "Ev Idx": 904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450778923.145, "dur": 6.750, + "args": { + "External id": 458122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450778924.573, "dur": 4.799, + "args": { + "External id": 458123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450778925.234, "dur": 4.018, + "args": { + "External id": 458124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778936.889, "dur": 10.417, + "args": { + "External id": 458125,"Record function id": 0, "Sequence number": 5036493, "Fwd thread id": 1, "Ev Idx": 908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778938.327, "dur": 5.931, + "args": { + "External id": 458126,"Sequence number": 5036493, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 909 + } + }, + { + "ph": "f", "id": 97, "pid": 2338707, "tid": 2379456, "ts": 6267450778938.327, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450778939.894, "dur": 4.160, + "args": { + "External id": 458127,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450778942.741, "dur": 1.210, + "args": { + "External id": 458128,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778951.295, "dur": 166.741, + "args": { + "External id": 458129,"Record function id": 0, "Sequence number": 5036492, "Fwd thread id": 1, "Ev Idx": 912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450778952.249, "dur": 152.088, + "args": { + "External id": 458130,"Sequence number": 5036492, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 913 + } + }, + { + "ph": "f", "id": 98, "pid": 2338707, "tid": 2379456, "ts": 6267450778952.249, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450778955.157, "dur": 2.740, + "args": { + "External id": 458131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450778955.854, "dur": 1.515, + "args": { + "External id": 458132,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450778956.680, "dur": 0.485, + "args": { + "External id": 458133,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450778958.789, "dur": 45.332, + "args": { + "External id": 458134,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450779007.809, "dur": 6.148, + "args": { + "External id": 458135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450779008.619, "dur": 4.720, + "args": { + "External id": 458136,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450779012.509, "dur": 0.655, + "args": { + "External id": 458137,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450779015.683, "dur": 3.894, + "args": { + "External id": 458138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450779017.387, "dur": 1.352, + "args": { + "External id": 458139,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450779018.095, "dur": 0.563, + "args": { + "External id": 458140,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450779022.701, "dur": 79.533, + "args": { + "External id": 458141,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450779126.558, "dur": 39.044, + "args": { + "External id": 458142,"Record function id": 0, "Sequence number": 5036491, "Fwd thread id": 1, "Ev Idx": 925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450779127.896, "dur": 8.172, + "args": { + "External id": 458143,"Sequence number": 5036491, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 926 + } + }, + { + "ph": "f", "id": 99, "pid": 2338707, "tid": 2379456, "ts": 6267450779127.896, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450779129.928, "dur": 5.909, + "args": { + "External id": 458144,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450779133.745, "dur": 1.923, + "args": { + "External id": 458145,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267450779139.364, "dur": 23.437, + "args": { + "External id": 458146,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450779169.949, "dur": 14.099, + "args": { + "External id": 458147,"Record function id": 0, "Sequence number": 5036490, "Fwd thread id": 1, "Ev Idx": 930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450779171.241, "dur": 10.744, + "args": { + "External id": 458148,"Sequence number": 5036490, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 931 + } + }, + { + "ph": "f", "id": 100, "pid": 2338707, "tid": 2379456, "ts": 6267450779171.241, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450779172.813, "dur": 8.913, + "args": { + "External id": 458149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450779173.597, "dur": 7.415, + "args": { + "External id": 458150,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450779177.383, "dur": 3.462, + "args": { + "External id": 458151,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450779188.984, "dur": 7.134, + "args": { + "External id": 458152,"Record function id": 0, "Ev Idx": 935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450779190.654, "dur": 4.794, + "args": { + "External id": 458153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450779192.329, "dur": 2.589, + "args": { + "External id": 458154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450779193.148, "dur": 1.544, + "args": { + "External id": 458155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450779201.203, "dur": 472.209, + "args": { + "External id": 458156,"Record function id": 0, "Sequence number": 5036489, "Fwd thread id": 1, "Ev Idx": 939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450779202.708, "dur": 428.981, + "args": { + "External id": 458157,"Sequence number": 5036489, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 940 + } + }, + { + "ph": "f", "id": 101, "pid": 2338707, "tid": 2379456, "ts": 6267450779202.708, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450779248.560, "dur": 2.017, + "args": { + "External id": 458158,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450779249.095, "dur": 1.273, + "args": { + "External id": 458159,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450779268.805, "dur": 6.072, + "args": { + "External id": 458160,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450779289.055, "dur": 2.927, + "args": { + "External id": 458161,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450779476.624, "dur": 2.378, + "args": { + "External id": 458162,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450779483.905, "dur": 44.071, + "args": { + "External id": 458163,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450779498.669, "dur": 1.042, + "args": { + "External id": 458164,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267450779534.683, "dur": 37.663, + "args": { + "External id": 458165,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267450779536.755, "dur": 35.348, + "args": { + "External id": 458166,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450779541.294, "dur": 7.005, + "args": { + "External id": 458167,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450779550.343, "dur": 21.190, + "args": { + "External id": 458168,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267450779581.299, "dur": 6.352, + "args": { + "External id": 458169,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450779582.496, "dur": 4.980, + "args": { + "External id": 458170,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450779618.737, "dur": 2.995, + "args": { + "External id": 458171,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450779619.456, "dur": 1.926, + "args": { + "External id": 458172,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267450779646.509, "dur": 21.273, + "args": { + "External id": 458173,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450779687.667, "dur": 12.808, + "args": { + "External id": 458174,"Record function id": 0, "Ev Idx": 957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450779690.230, "dur": 9.423, + "args": { + "External id": 458175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450779692.580, "dur": 5.783, + "args": { + "External id": 458176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450779696.202, "dur": 2.013, + "args": { + "External id": 458177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450779704.994, "dur": 7.896, + "args": { + "External id": 458178,"Record function id": 0, "Sequence number": 5036488, "Fwd thread id": 1, "Ev Idx": 961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450779706.453, "dur": 1.769, + "args": { + "External id": 458179,"Sequence number": 5036488, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 962 + } + }, + { + "ph": "f", "id": 102, "pid": 2338707, "tid": 2379456, "ts": 6267450779706.453, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450779717.436, "dur": 574.835, + "args": { + "External id": 458180,"Record function id": 0, "Sequence number": 5036487, "Fwd thread id": 1, "Ev Idx": 963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450779718.761, "dur": 556.412, + "args": { + "External id": 458181,"Sequence number": 5036487, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 964 + } + }, + { + "ph": "f", "id": 103, "pid": 2338707, "tid": 2379456, "ts": 6267450779718.761, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450779754.766, "dur": 11.604, + "args": { + "External id": 458182,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450779762.051, "dur": 4.019, + "args": { + "External id": 458183,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450779770.617, "dur": 8.323, + "args": { + "External id": 458184,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450779775.057, "dur": 2.906, + "args": { + "External id": 458185,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450779776.706, "dur": 1.007, + "args": { + "External id": 458186,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2379456, + "ts": 6267450779782.875, "dur": 121.384, + "args": { + "External id": 458187,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450779783.895, "dur": 3.030, + "args": { + "External id": 458188,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450779784.843, "dur": 1.560, + "args": { + "External id": 458189,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450779785.730, "dur": 0.552, + "args": { + "External id": 458190,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2379456, + "ts": 6267450779791.498, "dur": 112.195, + "args": { + "External id": 458191,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450779793.253, "dur": 109.443, + "args": { + "External id": 458192,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267450779909.722, "dur": 5.334, + "args": { + "External id": 458193,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450779913.348, "dur": 1.535, + "args": { + "External id": 458194,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450779954.187, "dur": 6.555, + "args": { + "External id": 458195,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450779961.831, "dur": 5.276, + "args": { + "External id": 458196,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450779968.076, "dur": 1.974, + "args": { + "External id": 458197,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450780014.937, "dur": 2.445, + "args": { + "External id": 458198,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450780015.895, "dur": 1.310, + "args": { + "External id": 458199,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338707, "tid": 2379456, + "ts": 6267450780042.830, "dur": 207.306, + "args": { + "External id": 458200,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2379456, + "ts": 6267450780049.151, "dur": 5.711, + "args": { + "External id": 458201,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450780053.024, "dur": 0.973, + "args": { + "External id": 458202,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450780099.894, "dur": 7.585, + "args": { + "External id": 458203,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450780105.233, "dur": 0.835, + "args": { + "External id": 458204,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2379456, + "ts": 6267450780109.631, "dur": 4.929, + "args": { + "External id": 458205,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450780113.224, "dur": 0.919, + "args": { + "External id": 458206,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450780115.715, "dur": 5.056, + "args": { + "External id": 458207,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450780119.407, "dur": 0.712, + "args": { + "External id": 458208,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450780127.515, "dur": 4.244, + "args": { + "External id": 458209,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450780130.506, "dur": 0.877, + "args": { + "External id": 458210,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450780132.947, "dur": 10.697, + "args": { + "External id": 458211,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450780138.280, "dur": 5.127, + "args": { + "External id": 458212,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450780144.568, "dur": 2.092, + "args": { + "External id": 458213,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450780145.449, "dur": 0.844, + "args": { + "External id": 458214,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450780149.096, "dur": 4.908, + "args": { + "External id": 458215,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450780149.815, "dur": 4.075, + "args": { + "External id": 458216,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267450780155.463, "dur": 78.081, + "args": { + "External id": 458217,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 1000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450780237.380, "dur": 1.503, + "args": { + "External id": 458218,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 1001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450780239.524, "dur": 5.309, + "args": { + "External id": 458219,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 1002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450780243.461, "dur": 0.661, + "args": { + "External id": 458220,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 1003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450780247.934, "dur": 0.976, + "args": { + "External id": 458221,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 1004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450780307.543, "dur": 11.047, + "args": { + "External id": 458222,"Record function id": 0, "Ev Idx": 1005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450780310.340, "dur": 7.362, + "args": { + "External id": 458223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450780313.245, "dur": 3.516, + "args": { + "External id": 458224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450780314.132, "dur": 2.453, + "args": { + "External id": 458225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450780323.365, "dur": 13.293, + "args": { + "External id": 458226,"Record function id": 0, "Sequence number": 5036486, "Fwd thread id": 1, "Ev Idx": 1009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450780324.693, "dur": 8.689, + "args": { + "External id": 458227,"Sequence number": 5036486, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1010 + } + }, + { + "ph": "f", "id": 104, "pid": 2338707, "tid": 2379456, "ts": 6267450780324.693, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450780326.543, "dur": 6.536, + "args": { + "External id": 458228,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450780332.065, "dur": 0.855, + "args": { + "External id": 458229,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450780340.826, "dur": 170.019, + "args": { + "External id": 458230,"Record function id": 0, "Sequence number": 5036485, "Fwd thread id": 1, "Ev Idx": 1013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450780341.930, "dur": 159.648, + "args": { + "External id": 458231,"Sequence number": 5036485, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1014 + } + }, + { + "ph": "f", "id": 105, "pid": 2338707, "tid": 2379456, "ts": 6267450780341.930, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450780346.118, "dur": 6.580, + "args": { + "External id": 458232,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450780347.653, "dur": 4.376, + "args": { + "External id": 458233,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450780349.035, "dur": 2.829, + "args": { + "External id": 458234,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450780356.475, "dur": 54.831, + "args": { + "External id": 458235,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450780413.096, "dur": 4.627, + "args": { + "External id": 458236,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450780413.805, "dur": 2.995, + "args": { + "External id": 458237,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450780414.894, "dur": 1.726, + "args": { + "External id": 458238,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450780419.652, "dur": 37.122, + "args": { + "External id": 458239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450780420.749, "dur": 35.429, + "args": { + "External id": 458240,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450780455.670, "dur": 0.396, + "args": { + "External id": 458241,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450780457.711, "dur": 42.835, + "args": { + "External id": 458242,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450780519.313, "dur": 6.545, + "args": { + "External id": 458243,"Record function id": 0, "Sequence number": 5036484, "Fwd thread id": 1, "Ev Idx": 1026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450780520.410, "dur": 3.489, + "args": { + "External id": 458244,"Sequence number": 5036484, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1027 + } + }, + { + "ph": "f", "id": 106, "pid": 2338707, "tid": 2379456, "ts": 6267450780520.410, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450780521.761, "dur": 1.941, + "args": { + "External id": 458245,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450780522.378, "dur": 1.182, + "args": { + "External id": 458246,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450780530.024, "dur": 10.821, + "args": { + "External id": 458247,"Record function id": 0, "Sequence number": 5036483, "Fwd thread id": 1, "Ev Idx": 1030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450780531.013, "dur": 6.986, + "args": { + "External id": 458248,"Sequence number": 5036483, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1031 + } + }, + { + "ph": "f", "id": 107, "pid": 2338707, "tid": 2379456, "ts": 6267450780531.013, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450780531.912, "dur": 5.816, + "args": { + "External id": 458249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450780532.633, "dur": 4.477, + "args": { + "External id": 458250,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450780536.539, "dur": 0.441, + "args": { + "External id": 458251,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450780547.378, "dur": 8.499, + "args": { + "External id": 458252,"Record function id": 0, "Ev Idx": 1035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450780548.790, "dur": 6.381, + "args": { + "External id": 458253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450780550.026, "dur": 4.644, + "args": { + "External id": 458254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450780550.772, "dur": 3.770, + "args": { + "External id": 458255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450780559.504, "dur": 7.586, + "args": { + "External id": 458256,"Record function id": 0, "Sequence number": 5036482, "Fwd thread id": 1, "Ev Idx": 1039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450780560.375, "dur": 3.551, + "args": { + "External id": 458257,"Sequence number": 5036482, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1040 + } + }, + { + "ph": "f", "id": 108, "pid": 2338707, "tid": 2379456, "ts": 6267450780560.375, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450780561.904, "dur": 1.834, + "args": { + "External id": 458258,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450780562.404, "dur": 1.181, + "args": { + "External id": 458259,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450780570.760, "dur": 184.146, + "args": { + "External id": 458260,"Record function id": 0, "Sequence number": 5036481, "Fwd thread id": 1, "Ev Idx": 1043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450780571.677, "dur": 169.756, + "args": { + "External id": 458261,"Sequence number": 5036481, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1044 + } + }, + { + "ph": "f", "id": 109, "pid": 2338707, "tid": 2379456, "ts": 6267450780571.677, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450780577.079, "dur": 2.714, + "args": { + "External id": 458262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450780577.683, "dur": 1.622, + "args": { + "External id": 458263,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450780578.649, "dur": 0.532, + "args": { + "External id": 458264,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450780580.556, "dur": 87.552, + "args": { + "External id": 458265,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450780670.889, "dur": 7.197, + "args": { + "External id": 458266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450780671.734, "dur": 5.557, + "args": { + "External id": 458267,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450780676.169, "dur": 0.972, + "args": { + "External id": 458268,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450780679.652, "dur": 3.768, + "args": { + "External id": 458269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450780680.992, "dur": 1.651, + "args": { + "External id": 458270,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450780681.930, "dur": 0.633, + "args": { + "External id": 458271,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450780684.150, "dur": 56.393, + "args": { + "External id": 458272,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450780762.271, "dur": 46.784, + "args": { + "External id": 458273,"Record function id": 0, "Sequence number": 5036480, "Fwd thread id": 1, "Ev Idx": 1056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450780763.477, "dur": 8.266, + "args": { + "External id": 458274,"Sequence number": 5036480, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1057 + } + }, + { + "ph": "f", "id": 110, "pid": 2338707, "tid": 2379456, "ts": 6267450780763.477, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450780765.157, "dur": 6.398, + "args": { + "External id": 458275,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450780770.157, "dur": 1.279, + "args": { + "External id": 458276,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267450780775.721, "dur": 29.787, + "args": { + "External id": 458277,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450780813.703, "dur": 8.626, + "args": { + "External id": 458278,"Record function id": 0, "Sequence number": 5036479, "Fwd thread id": 1, "Ev Idx": 1061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450780814.843, "dur": 4.776, + "args": { + "External id": 458279,"Sequence number": 5036479, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1062 + } + }, + { + "ph": "f", "id": 111, "pid": 2338707, "tid": 2379456, "ts": 6267450780814.843, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450780815.924, "dur": 3.441, + "args": { + "External id": 458280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450780816.759, "dur": 2.057, + "args": { + "External id": 458281,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450780817.983, "dur": 0.652, + "args": { + "External id": 458282,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450780827.179, "dur": 9.293, + "args": { + "External id": 458283,"Record function id": 0, "Ev Idx": 1066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450780828.547, "dur": 7.298, + "args": { + "External id": 458284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450780830.128, "dur": 5.148, + "args": { + "External id": 458285,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450780833.623, "dur": 1.472, + "args": { + "External id": 458286,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450780841.256, "dur": 573.089, + "args": { + "External id": 458287,"Record function id": 0, "Sequence number": 5036478, "Fwd thread id": 1, "Ev Idx": 1070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450780842.886, "dur": 524.570, + "args": { + "External id": 458288,"Sequence number": 5036478, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 1071 + } + }, + { + "ph": "f", "id": 112, "pid": 2338707, "tid": 2379456, "ts": 6267450780842.886, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2379456, + "ts": 6267450780872.338, "dur": 42.212, + "args": { + "External id": 458289,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267450780874.303, "dur": 40.000, + "args": { + "External id": 458290,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450780878.876, "dur": 7.030, + "args": { + "External id": 458291,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 1074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450780881.761, "dur": 3.527, + "args": { + "External id": 458292,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450780887.773, "dur": 25.973, + "args": { + "External id": 458293,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450780929.000, "dur": 2.610, + "args": { + "External id": 458294,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450780930.047, "dur": 1.401, + "args": { + "External id": 458295,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450780936.123, "dur": 3.670, + "args": { + "External id": 458296,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450780938.841, "dur": 0.845, + "args": { + "External id": 458297,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450780955.690, "dur": 3.126, + "args": { + "External id": 458298,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450780973.452, "dur": 2.511, + "args": { + "External id": 458299,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450781223.407, "dur": 6.107, + "args": { + "External id": 458300,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450781235.089, "dur": 47.812, + "args": { + "External id": 458301,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450781252.229, "dur": 1.251, + "args": { + "External id": 458302,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267450781290.001, "dur": 38.838, + "args": { + "External id": 458303,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267450781294.671, "dur": 33.869, + "args": { + "External id": 458304,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450781299.409, "dur": 7.289, + "args": { + "External id": 458305,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450781308.578, "dur": 19.263, + "args": { + "External id": 458306,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267450781333.981, "dur": 2.763, + "args": { + "External id": 458307,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450781335.357, "dur": 1.206, + "args": { + "External id": 458308,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450781345.311, "dur": 6.209, + "args": { + "External id": 458309,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450781349.914, "dur": 1.504, + "args": { + "External id": 458310,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450781353.694, "dur": 2.355, + "args": { + "External id": 458311,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450781354.281, "dur": 1.662, + "args": { + "External id": 458312,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267450781390.425, "dur": 21.996, + "args": { + "External id": 458313,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450781430.310, "dur": 10.761, + "args": { + "External id": 458314,"Record function id": 0, "Ev Idx": 1097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450781433.153, "dur": 7.008, + "args": { + "External id": 458315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450781435.704, "dur": 3.397, + "args": { + "External id": 458316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450781437.031, "dur": 1.929, + "args": { + "External id": 458317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450781445.995, "dur": 13.124, + "args": { + "External id": 458318,"Record function id": 0, "Sequence number": 5036477, "Fwd thread id": 1, "Ev Idx": 1101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450781447.282, "dur": 8.059, + "args": { + "External id": 458319,"Sequence number": 5036477, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1102 + } + }, + { + "ph": "f", "id": 113, "pid": 2338707, "tid": 2379456, "ts": 6267450781447.282, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450781450.473, "dur": 4.606, + "args": { + "External id": 458320,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450781453.758, "dur": 1.150, + "args": { + "External id": 458321,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450781463.343, "dur": 246.233, + "args": { + "External id": 458322,"Record function id": 0, "Sequence number": 5036476, "Fwd thread id": 1, "Ev Idx": 1105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450781464.563, "dur": 232.860, + "args": { + "External id": 458323,"Sequence number": 5036476, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1106 + } + }, + { + "ph": "f", "id": 114, "pid": 2338707, "tid": 2379456, "ts": 6267450781464.563, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450781469.728, "dur": 5.725, + "args": { + "External id": 458324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450781471.210, "dur": 3.510, + "args": { + "External id": 458325,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450781473.562, "dur": 0.899, + "args": { + "External id": 458326,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450781479.476, "dur": 99.652, + "args": { + "External id": 458327,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450781581.153, "dur": 4.863, + "args": { + "External id": 458328,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450781581.683, "dur": 3.596, + "args": { + "External id": 458329,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450781583.872, "dur": 1.255, + "args": { + "External id": 458330,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450781587.815, "dur": 38.342, + "args": { + "External id": 458331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450781615.782, "dur": 9.365, + "args": { + "External id": 458332,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450781621.374, "dur": 3.489, + "args": { + "External id": 458333,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450781626.868, "dur": 68.973, + "args": { + "External id": 458334,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450781720.298, "dur": 9.996, + "args": { + "External id": 458335,"Record function id": 0, "Sequence number": 5036475, "Fwd thread id": 1, "Ev Idx": 1118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450781721.771, "dur": 5.859, + "args": { + "External id": 458336,"Sequence number": 5036475, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1119 + } + }, + { + "ph": "f", "id": 115, "pid": 2338707, "tid": 2379456, "ts": 6267450781721.771, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450781724.195, "dur": 3.252, + "args": { + "External id": 458337,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450781725.623, "dur": 1.688, + "args": { + "External id": 458338,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450781734.390, "dur": 12.398, + "args": { + "External id": 458339,"Record function id": 0, "Sequence number": 5036474, "Fwd thread id": 1, "Ev Idx": 1122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450781736.003, "dur": 7.903, + "args": { + "External id": 458340,"Sequence number": 5036474, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1123 + } + }, + { + "ph": "f", "id": 116, "pid": 2338707, "tid": 2379456, "ts": 6267450781736.003, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450781736.877, "dur": 6.762, + "args": { + "External id": 458341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450781737.916, "dur": 5.086, + "args": { + "External id": 458342,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450781742.071, "dur": 0.767, + "args": { + "External id": 458343,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450781752.063, "dur": 8.386, + "args": { + "External id": 458344,"Record function id": 0, "Ev Idx": 1127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450781754.318, "dur": 5.382, + "args": { + "External id": 458345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450781756.330, "dur": 2.837, + "args": { + "External id": 458346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450781757.403, "dur": 1.615, + "args": { + "External id": 458347,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450781764.403, "dur": 8.760, + "args": { + "External id": 458348,"Record function id": 0, "Sequence number": 5036473, "Fwd thread id": 1, "Ev Idx": 1131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450781765.589, "dur": 4.350, + "args": { + "External id": 458349,"Sequence number": 5036473, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1132 + } + }, + { + "ph": "f", "id": 117, "pid": 2338707, "tid": 2379456, "ts": 6267450781765.589, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450781767.330, "dur": 2.416, + "args": { + "External id": 458350,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450781768.281, "dur": 1.313, + "args": { + "External id": 458351,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450781779.158, "dur": 514.957, + "args": { + "External id": 458352,"Record function id": 0, "Sequence number": 5036472, "Fwd thread id": 1, "Ev Idx": 1135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450781781.300, "dur": 483.897, + "args": { + "External id": 458353,"Sequence number": 5036472, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1136 + } + }, + { + "ph": "f", "id": 118, "pid": 2338707, "tid": 2379456, "ts": 6267450781781.300, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450781802.651, "dur": 8.557, + "args": { + "External id": 458354,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450781806.147, "dur": 4.545, + "args": { + "External id": 458355,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450781813.804, "dur": 6.478, + "args": { + "External id": 458356,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450781818.240, "dur": 1.789, + "args": { + "External id": 458357,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450781824.803, "dur": 8.887, + "args": { + "External id": 458358,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450781826.770, "dur": 6.688, + "args": { + "External id": 458359,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450781865.326, "dur": 367.673, + "args": { + "External id": 458360,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450781962.841, "dur": 6.128, + "args": { + "External id": 458361,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450781971.579, "dur": 2.633, + "args": { + "External id": 458362,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450781975.383, "dur": 2.306, + "args": { + "External id": 458363,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450781979.095, "dur": 3.963, + "args": { + "External id": 458364,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450782100.006, "dur": 3.899, + "args": { + "External id": 458365,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450782101.278, "dur": 2.265, + "args": { + "External id": 458366,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450782109.129, "dur": 38.682, + "args": { + "External id": 458367,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450782116.261, "dur": 1.139, + "args": { + "External id": 458368,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450782151.487, "dur": 1.917, + "args": { + "External id": 458369,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450782152.390, "dur": 0.916, + "args": { + "External id": 458370,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450782154.671, "dur": 20.949, + "args": { + "External id": 458371,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450782160.070, "dur": 0.772, + "args": { + "External id": 458372,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450782249.527, "dur": 4.918, + "args": { + "External id": 458373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450782258.290, "dur": 0.930, + "args": { + "External id": 458374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450782261.206, "dur": 0.658, + "args": { + "External id": 458375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450782308.964, "dur": 325.805, + "args": { + "External id": 458376,"Record function id": 0, "Sequence number": 5036471, "Fwd thread id": 1, "Ev Idx": 1159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450782311.753, "dur": 310.777, + "args": { + "External id": 458377,"Sequence number": 5036471, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1160 + } + }, + { + "ph": "f", "id": 119, "pid": 2338707, "tid": 2379456, "ts": 6267450782311.753, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267450782338.157, "dur": 63.676, + "args": { + "External id": 458378,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450782342.374, "dur": 8.290, + "args": { + "External id": 458379,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450782352.620, "dur": 48.301, + "args": { + "External id": 458380,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 1163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450782415.115, "dur": 5.962, + "args": { + "External id": 458381,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450782417.554, "dur": 3.069, + "args": { + "External id": 458382,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450782647.937, "dur": 216.070, + "args": { + "External id": 458383,"Record function id": 0, "Sequence number": 5036470, "Fwd thread id": 1, "Ev Idx": 1166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450782651.045, "dur": 203.839, + "args": { + "External id": 458384,"Sequence number": 5036470, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1167 + } + }, + { + "ph": "f", "id": 120, "pid": 2338707, "tid": 2379456, "ts": 6267450782651.045, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267450782667.810, "dur": 57.204, + "args": { + "External id": 458385,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450782671.844, "dur": 4.638, + "args": { + "External id": 458386,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450782677.716, "dur": 46.728, + "args": { + "External id": 458387,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 1170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450782734.402, "dur": 6.280, + "args": { + "External id": 458388,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450782736.692, "dur": 3.651, + "args": { + "External id": 458389,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450782872.277, "dur": 17.996, + "args": { + "External id": 458390,"Record function id": 0, "Sequence number": 5036469, "Fwd thread id": 1, "Ev Idx": 1173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450782874.172, "dur": 12.113, + "args": { + "External id": 458391,"Sequence number": 5036469, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1174 + } + }, + { + "ph": "f", "id": 121, "pid": 2338707, "tid": 2379456, "ts": 6267450782874.172, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450782877.754, "dur": 8.020, + "args": { + "External id": 458392,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450782879.465, "dur": 6.034, + "args": { + "External id": 458393,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450782894.648, "dur": 12.324, + "args": { + "External id": 458394,"Record function id": 0, "Sequence number": 5036468, "Fwd thread id": 1, "Ev Idx": 1177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450782898.619, "dur": 5.233, + "args": { + "External id": 458395,"Sequence number": 5036468, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1178 + } + }, + { + "ph": "f", "id": 122, "pid": 2338707, "tid": 2379456, "ts": 6267450782898.619, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450782900.892, "dur": 2.793, + "args": { + "External id": 458396,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450782902.373, "dur": 1.204, + "args": { + "External id": 458397,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450782910.912, "dur": 9.320, + "args": { + "External id": 458398,"Record function id": 0, "Sequence number": 5036467, "Fwd thread id": 1, "Ev Idx": 1181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450782912.299, "dur": 5.472, + "args": { + "External id": 458399,"Sequence number": 5036467, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1182 + } + }, + { + "ph": "f", "id": 123, "pid": 2338707, "tid": 2379456, "ts": 6267450782912.299, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450782914.447, "dur": 3.130, + "args": { + "External id": 458400,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450782915.926, "dur": 1.541, + "args": { + "External id": 458401,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450782924.417, "dur": 10.880, + "args": { + "External id": 458402,"Record function id": 0, "Sequence number": 5036466, "Fwd thread id": 1, "Ev Idx": 1185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450782926.110, "dur": 6.477, + "args": { + "External id": 458403,"Sequence number": 5036466, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1186 + } + }, + { + "ph": "f", "id": 124, "pid": 2338707, "tid": 2379456, "ts": 6267450782926.110, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450782927.673, "dur": 4.725, + "args": { + "External id": 458404,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450782931.202, "dur": 1.094, + "args": { + "External id": 458405,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450782942.446, "dur": 272.887, + "args": { + "External id": 458406,"Record function id": 0, "Sequence number": 5036465, "Fwd thread id": 1, "Ev Idx": 1189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450782943.725, "dur": 259.023, + "args": { + "External id": 458407,"Sequence number": 5036465, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1190 + } + }, + { + "ph": "f", "id": 125, "pid": 2338707, "tid": 2379456, "ts": 6267450782943.725, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450782949.265, "dur": 11.444, + "args": { + "External id": 458408,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450782951.728, "dur": 8.001, + "args": { + "External id": 458409,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450782954.711, "dur": 4.652, + "args": { + "External id": 458410,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450782963.125, "dur": 136.241, + "args": { + "External id": 458411,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450783102.650, "dur": 19.045, + "args": { + "External id": 458412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450783112.886, "dur": 7.474, + "args": { + "External id": 458413,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450783116.570, "dur": 3.516, + "args": { + "External id": 458414,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450783124.449, "dur": 5.559, + "args": { + "External id": 458415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450783126.691, "dur": 2.446, + "args": { + "External id": 458416,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450783128.280, "dur": 0.714, + "args": { + "External id": 458417,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450783130.927, "dur": 70.598, + "args": { + "External id": 458418,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783225.205, "dur": 12.891, + "args": { + "External id": 458419,"Record function id": 0, "Sequence number": 5036464, "Fwd thread id": 1, "Ev Idx": 1202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783226.676, "dur": 8.915, + "args": { + "External id": 458420,"Sequence number": 5036464, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1203 + } + }, + { + "ph": "f", "id": 126, "pid": 2338707, "tid": 2379456, "ts": 6267450783226.676, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450783229.586, "dur": 5.847, + "args": { + "External id": 458421,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450783233.197, "dur": 2.060, + "args": { + "External id": 458422,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783242.113, "dur": 10.514, + "args": { + "External id": 458423,"Record function id": 0, "Sequence number": 5036463, "Fwd thread id": 1, "Ev Idx": 1206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783243.044, "dur": 6.021, + "args": { + "External id": 458424,"Sequence number": 5036463, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1207 + } + }, + { + "ph": "f", "id": 127, "pid": 2338707, "tid": 2379456, "ts": 6267450783243.044, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450783244.216, "dur": 4.596, + "args": { + "External id": 458425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450783244.953, "dur": 3.242, + "args": { + "External id": 458426,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450783247.244, "dur": 0.821, + "args": { + "External id": 458427,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450783260.288, "dur": 18.129, + "args": { + "External id": 458428,"Record function id": 0, "Ev Idx": 1211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450783263.065, "dur": 14.295, + "args": { + "External id": 458429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450783266.485, "dur": 10.361, + "args": { + "External id": 458430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450783270.444, "dur": 6.236, + "args": { + "External id": 458431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783282.948, "dur": 8.434, + "args": { + "External id": 458432,"Record function id": 0, "Sequence number": 5036462, "Fwd thread id": 1, "Ev Idx": 1215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783284.773, "dur": 3.717, + "args": { + "External id": 458433,"Sequence number": 5036462, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1216 + } + }, + { + "ph": "f", "id": 128, "pid": 2338707, "tid": 2379456, "ts": 6267450783284.773, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450783286.099, "dur": 2.214, + "args": { + "External id": 458434,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450783287.031, "dur": 1.180, + "args": { + "External id": 458435,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783295.207, "dur": 117.680, + "args": { + "External id": 458436,"Record function id": 0, "Sequence number": 5036461, "Fwd thread id": 1, "Ev Idx": 1219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783296.313, "dur": 107.918, + "args": { + "External id": 458437,"Sequence number": 5036461, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1220 + } + }, + { + "ph": "f", "id": 129, "pid": 2338707, "tid": 2379456, "ts": 6267450783296.313, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450783300.484, "dur": 5.924, + "args": { + "External id": 458438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450783301.254, "dur": 4.610, + "args": { + "External id": 458439,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450783305.038, "dur": 0.704, + "args": { + "External id": 458440,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450783307.353, "dur": 37.355, + "args": { + "External id": 458441,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450783345.845, "dur": 4.892, + "args": { + "External id": 458442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450783346.720, "dur": 3.424, + "args": { + "External id": 458443,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450783348.617, "dur": 1.399, + "args": { + "External id": 458444,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450783352.179, "dur": 7.121, + "args": { + "External id": 458445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450783356.067, "dur": 2.556, + "args": { + "External id": 458446,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450783357.928, "dur": 0.612, + "args": { + "External id": 458447,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450783359.789, "dur": 43.440, + "args": { + "External id": 458448,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783418.246, "dur": 41.799, + "args": { + "External id": 458449,"Record function id": 0, "Sequence number": 5036460, "Fwd thread id": 1, "Ev Idx": 1232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783419.522, "dur": 4.869, + "args": { + "External id": 458450,"Sequence number": 5036460, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1233 + } + }, + { + "ph": "f", "id": 130, "pid": 2338707, "tid": 2379456, "ts": 6267450783419.522, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450783421.111, "dur": 3.093, + "args": { + "External id": 458451,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450783422.939, "dur": 1.078, + "args": { + "External id": 458452,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267450783428.223, "dur": 28.934, + "args": { + "External id": 458453,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783464.729, "dur": 14.158, + "args": { + "External id": 458454,"Record function id": 0, "Sequence number": 5036459, "Fwd thread id": 1, "Ev Idx": 1237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783469.521, "dur": 6.547, + "args": { + "External id": 458455,"Sequence number": 5036459, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1238 + } + }, + { + "ph": "f", "id": 131, "pid": 2338707, "tid": 2379456, "ts": 6267450783469.521, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450783471.234, "dur": 4.590, + "args": { + "External id": 458456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450783472.458, "dur": 2.771, + "args": { + "External id": 458457,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450783474.114, "dur": 0.937, + "args": { + "External id": 458458,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450783483.560, "dur": 6.897, + "args": { + "External id": 458459,"Record function id": 0, "Ev Idx": 1242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450783486.069, "dur": 3.670, + "args": { + "External id": 458460,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450783487.349, "dur": 1.923, + "args": { + "External id": 458461,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450783487.952, "dur": 1.120, + "args": { + "External id": 458462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783494.462, "dur": 11.197, + "args": { + "External id": 458463,"Record function id": 0, "Sequence number": 5036458, "Fwd thread id": 1, "Ev Idx": 1246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783497.021, "dur": 6.138, + "args": { + "External id": 458464,"Sequence number": 5036458, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1247 + } + }, + { + "ph": "f", "id": 132, "pid": 2338707, "tid": 2379456, "ts": 6267450783497.021, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450783498.139, "dur": 4.819, + "args": { + "External id": 458465,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450783501.580, "dur": 1.265, + "args": { + "External id": 458466,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783509.547, "dur": 147.643, + "args": { + "External id": 458467,"Record function id": 0, "Sequence number": 5036457, "Fwd thread id": 1, "Ev Idx": 1250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783510.655, "dur": 134.289, + "args": { + "External id": 458468,"Sequence number": 5036457, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1251 + } + }, + { + "ph": "f", "id": 133, "pid": 2338707, "tid": 2379456, "ts": 6267450783510.655, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450783513.971, "dur": 3.563, + "args": { + "External id": 458469,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450783514.697, "dur": 2.290, + "args": { + "External id": 458470,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450783516.424, "dur": 0.443, + "args": { + "External id": 458471,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450783521.127, "dur": 44.785, + "args": { + "External id": 458472,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450783567.101, "dur": 4.571, + "args": { + "External id": 458473,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450783567.621, "dur": 3.343, + "args": { + "External id": 458474,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450783569.773, "dur": 1.048, + "args": { + "External id": 458475,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450783573.197, "dur": 6.856, + "args": { + "External id": 458476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450783574.690, "dur": 4.707, + "args": { + "External id": 458477,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450783578.914, "dur": 0.402, + "args": { + "External id": 458478,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450783580.589, "dur": 62.830, + "args": { + "External id": 458479,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783665.327, "dur": 35.026, + "args": { + "External id": 458480,"Record function id": 0, "Sequence number": 5036456, "Fwd thread id": 1, "Ev Idx": 1263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783666.774, "dur": 6.040, + "args": { + "External id": 458481,"Sequence number": 5036456, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1264 + } + }, + { + "ph": "f", "id": 134, "pid": 2338707, "tid": 2379456, "ts": 6267450783666.774, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450783669.386, "dur": 3.233, + "args": { + "External id": 458482,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450783670.625, "dur": 1.806, + "args": { + "External id": 458483,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267450783676.045, "dur": 21.583, + "args": { + "External id": 458484,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783704.894, "dur": 12.886, + "args": { + "External id": 458485,"Record function id": 0, "Sequence number": 5036455, "Fwd thread id": 1, "Ev Idx": 1268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450783705.886, "dur": 9.136, + "args": { + "External id": 458486,"Sequence number": 5036455, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1269 + } + }, + { + "ph": "f", "id": 135, "pid": 2338707, "tid": 2379456, "ts": 6267450783705.886, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450783707.088, "dur": 7.648, + "args": { + "External id": 458487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450783708.374, "dur": 5.741, + "args": { + "External id": 458488,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450783712.912, "dur": 1.027, + "args": { + "External id": 458489,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450783722.929, "dur": 8.375, + "args": { + "External id": 458490,"Record function id": 0, "Ev Idx": 1273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450783725.288, "dur": 5.363, + "args": { + "External id": 458491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450783726.971, "dur": 3.052, + "args": { + "External id": 458492,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450783727.814, "dur": 2.113, + "args": { + "External id": 458493,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450783740.109, "dur": 492.101, + "args": { + "External id": 458494,"Record function id": 0, "Sequence number": 5036454, "Fwd thread id": 1, "Ev Idx": 1277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450783741.790, "dur": 450.735, + "args": { + "External id": 458495,"Sequence number": 5036454, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1278 + } + }, + { + "ph": "f", "id": 136, "pid": 2338707, "tid": 2379456, "ts": 6267450783741.790, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450783784.282, "dur": 2.873, + "args": { + "External id": 458496,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450783785.742, "dur": 1.211, + "args": { + "External id": 458497,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450783809.207, "dur": 5.236, + "args": { + "External id": 458498,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450783826.397, "dur": 2.412, + "args": { + "External id": 458499,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450784011.551, "dur": 2.016, + "args": { + "External id": 458500,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450784018.370, "dur": 88.710, + "args": { + "External id": 458501,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450784035.349, "dur": 1.006, + "args": { + "External id": 458502,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267450784116.857, "dur": 42.807, + "args": { + "External id": 458503,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267450784118.969, "dur": 40.450, + "args": { + "External id": 458504,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450784125.083, "dur": 8.346, + "args": { + "External id": 458505,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450784135.504, "dur": 23.282, + "args": { + "External id": 458506,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267450784168.194, "dur": 3.675, + "args": { + "External id": 458507,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450784169.991, "dur": 1.761, + "args": { + "External id": 458508,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450784180.791, "dur": 3.104, + "args": { + "External id": 458509,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450784182.369, "dur": 1.379, + "args": { + "External id": 458510,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267450784207.594, "dur": 19.785, + "args": { + "External id": 458511,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450784247.451, "dur": 14.586, + "args": { + "External id": 458512,"Record function id": 0, "Ev Idx": 1295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450784250.459, "dur": 10.570, + "args": { + "External id": 458513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450784253.181, "dur": 6.705, + "args": { + "External id": 458514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450784257.358, "dur": 2.411, + "args": { + "External id": 458515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450784266.890, "dur": 7.478, + "args": { + "External id": 458516,"Record function id": 0, "Sequence number": 5036453, "Fwd thread id": 1, "Ev Idx": 1299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450784268.425, "dur": 1.871, + "args": { + "External id": 458517,"Sequence number": 5036453, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1300 + } + }, + { + "ph": "f", "id": 137, "pid": 2338707, "tid": 2379456, "ts": 6267450784268.425, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450784280.080, "dur": 546.210, + "args": { + "External id": 458518,"Record function id": 0, "Sequence number": 5036452, "Fwd thread id": 1, "Ev Idx": 1301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450784281.934, "dur": 528.451, + "args": { + "External id": 458519,"Sequence number": 5036452, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1302 + } + }, + { + "ph": "f", "id": 138, "pid": 2338707, "tid": 2379456, "ts": 6267450784281.934, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450784320.506, "dur": 10.717, + "args": { + "External id": 458520,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450784327.066, "dur": 3.841, + "args": { + "External id": 458521,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 1304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450784335.238, "dur": 8.862, + "args": { + "External id": 458522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450784339.337, "dur": 3.976, + "args": { + "External id": 458523,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450784342.040, "dur": 1.096, + "args": { + "External id": 458524,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2379456, + "ts": 6267450784348.097, "dur": 114.022, + "args": { + "External id": 458525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 1308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450784349.101, "dur": 4.239, + "args": { + "External id": 458526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 1309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450784349.917, "dur": 2.820, + "args": { + "External id": 458527,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450784351.885, "dur": 0.742, + "args": { + "External id": 458528,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2379456, + "ts": 6267450784357.151, "dur": 104.197, + "args": { + "External id": 458529,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 1312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450784358.831, "dur": 101.516, + "args": { + "External id": 458530,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 1313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267450784467.301, "dur": 3.659, + "args": { + "External id": 458531,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 1314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450784469.168, "dur": 1.639, + "args": { + "External id": 458532,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 1315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450784512.388, "dur": 7.110, + "args": { + "External id": 458533,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450784521.306, "dur": 4.843, + "args": { + "External id": 458534,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450784527.684, "dur": 2.228, + "args": { + "External id": 458535,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450784572.072, "dur": 2.964, + "args": { + "External id": 458536,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450784573.297, "dur": 1.498, + "args": { + "External id": 458537,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338707, "tid": 2379456, + "ts": 6267450784620.033, "dur": 166.342, + "args": { + "External id": 458538,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 1321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2379456, + "ts": 6267450784626.417, "dur": 8.234, + "args": { + "External id": 458539,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450784631.800, "dur": 1.459, + "args": { + "External id": 458540,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450784636.676, "dur": 8.208, + "args": { + "External id": 458541,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 1324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450784642.981, "dur": 0.691, + "args": { + "External id": 458542,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 1325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2379456, + "ts": 6267450784646.810, "dur": 3.827, + "args": { + "External id": 458543,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 1326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450784649.537, "dur": 0.640, + "args": { + "External id": 458544,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450784651.820, "dur": 3.859, + "args": { + "External id": 458545,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 1328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450784654.246, "dur": 0.638, + "args": { + "External id": 458546,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 1329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450784663.131, "dur": 6.933, + "args": { + "External id": 458547,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 1330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450784666.128, "dur": 3.554, + "args": { + "External id": 458548,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 1331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450784671.533, "dur": 7.851, + "args": { + "External id": 458549,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 1332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450784676.543, "dur": 2.596, + "args": { + "External id": 458550,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 1333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450784680.770, "dur": 3.376, + "args": { + "External id": 458551,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 1334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450784683.236, "dur": 0.528, + "args": { + "External id": 458552,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 1335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450784685.317, "dur": 6.140, + "args": { + "External id": 458553,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 1336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450784687.224, "dur": 4.112, + "args": { + "External id": 458554,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 1337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267450784692.904, "dur": 75.406, + "args": { + "External id": 458555,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 1338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450784771.077, "dur": 1.563, + "args": { + "External id": 458556,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 1339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267450784776.395, "dur": 4.535, + "args": { + "External id": 458557,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 1340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450784779.164, "dur": 0.729, + "args": { + "External id": 458558,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 1341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450784784.117, "dur": 0.927, + "args": { + "External id": 458559,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 1342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450784840.805, "dur": 17.494, + "args": { + "External id": 458560,"Record function id": 0, "Ev Idx": 1343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450784843.731, "dur": 13.667, + "args": { + "External id": 458561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450784846.041, "dur": 10.480, + "args": { + "External id": 458562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450784850.924, "dur": 5.461, + "args": { + "External id": 458563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450784863.382, "dur": 12.984, + "args": { + "External id": 458564,"Record function id": 0, "Sequence number": 5036451, "Fwd thread id": 1, "Ev Idx": 1347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450784865.299, "dur": 7.559, + "args": { + "External id": 458565,"Sequence number": 5036451, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1348 + } + }, + { + "ph": "f", "id": 139, "pid": 2338707, "tid": 2379456, "ts": 6267450784865.299, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450784870.318, "dur": 2.251, + "args": { + "External id": 458566,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450784871.505, "dur": 0.907, + "args": { + "External id": 458567,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450784880.364, "dur": 138.016, + "args": { + "External id": 458568,"Record function id": 0, "Sequence number": 5036450, "Fwd thread id": 1, "Ev Idx": 1351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450784881.758, "dur": 128.022, + "args": { + "External id": 458569,"Sequence number": 5036450, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1352 + } + }, + { + "ph": "f", "id": 140, "pid": 2338707, "tid": 2379456, "ts": 6267450784881.758, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450784887.267, "dur": 8.691, + "args": { + "External id": 458570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450784888.871, "dur": 6.463, + "args": { + "External id": 458571,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450784894.175, "dur": 1.003, + "args": { + "External id": 458572,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450784897.528, "dur": 53.552, + "args": { + "External id": 458573,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450784952.689, "dur": 4.582, + "args": { + "External id": 458574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450784953.516, "dur": 2.815, + "args": { + "External id": 458575,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450784955.476, "dur": 0.693, + "args": { + "External id": 458576,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450784959.321, "dur": 7.291, + "args": { + "External id": 458577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450784961.148, "dur": 4.861, + "args": { + "External id": 458578,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450784964.962, "dur": 0.967, + "args": { + "External id": 458579,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450784967.286, "dur": 41.539, + "args": { + "External id": 458580,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450785024.592, "dur": 8.733, + "args": { + "External id": 458581,"Record function id": 0, "Sequence number": 5036449, "Fwd thread id": 1, "Ev Idx": 1364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450785025.873, "dur": 5.738, + "args": { + "External id": 458582,"Sequence number": 5036449, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1365 + } + }, + { + "ph": "f", "id": 141, "pid": 2338707, "tid": 2379456, "ts": 6267450785025.873, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450785028.202, "dur": 3.209, + "args": { + "External id": 458583,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450785029.985, "dur": 1.299, + "args": { + "External id": 458584,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450785037.776, "dur": 13.626, + "args": { + "External id": 458585,"Record function id": 0, "Sequence number": 5036448, "Fwd thread id": 1, "Ev Idx": 1368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450785038.906, "dur": 9.544, + "args": { + "External id": 458586,"Sequence number": 5036448, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1369 + } + }, + { + "ph": "f", "id": 142, "pid": 2338707, "tid": 2379456, "ts": 6267450785038.906, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450785039.916, "dur": 8.262, + "args": { + "External id": 458587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450785040.737, "dur": 6.797, + "args": { + "External id": 458588,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450785044.878, "dur": 2.522, + "args": { + "External id": 458589,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450785098.425, "dur": 10.780, + "args": { + "External id": 458590,"Record function id": 0, "Ev Idx": 1373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450785101.752, "dur": 6.480, + "args": { + "External id": 458591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450785104.022, "dur": 3.623, + "args": { + "External id": 458592,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450785105.242, "dur": 2.105, + "args": { + "External id": 458593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450785114.871, "dur": 9.781, + "args": { + "External id": 458594,"Record function id": 0, "Sequence number": 5036447, "Fwd thread id": 1, "Ev Idx": 1377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450785116.792, "dur": 4.746, + "args": { + "External id": 458595,"Sequence number": 5036447, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1378 + } + }, + { + "ph": "f", "id": 143, "pid": 2338707, "tid": 2379456, "ts": 6267450785116.792, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450785118.913, "dur": 2.398, + "args": { + "External id": 458596,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450785119.979, "dur": 1.160, + "args": { + "External id": 458597,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450785128.471, "dur": 132.108, + "args": { + "External id": 458598,"Record function id": 0, "Sequence number": 5036446, "Fwd thread id": 1, "Ev Idx": 1381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450785132.232, "dur": 116.913, + "args": { + "External id": 458599,"Sequence number": 5036446, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1382 + } + }, + { + "ph": "f", "id": 144, "pid": 2338707, "tid": 2379456, "ts": 6267450785132.232, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450785136.800, "dur": 4.006, + "args": { + "External id": 458600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450785137.756, "dur": 2.470, + "args": { + "External id": 458601,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450785139.404, "dur": 0.649, + "args": { + "External id": 458602,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450785141.898, "dur": 50.300, + "args": { + "External id": 458603,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450785193.439, "dur": 6.160, + "args": { + "External id": 458604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450785194.036, "dur": 4.969, + "args": { + "External id": 458605,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450785197.952, "dur": 0.911, + "args": { + "External id": 458606,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450785201.465, "dur": 5.053, + "args": { + "External id": 458607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450785203.059, "dur": 2.733, + "args": { + "External id": 458608,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450785204.973, "dur": 0.738, + "args": { + "External id": 458609,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450785207.004, "dur": 41.173, + "args": { + "External id": 458610,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450785268.322, "dur": 44.981, + "args": { + "External id": 458611,"Record function id": 0, "Sequence number": 5036445, "Fwd thread id": 1, "Ev Idx": 1394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450785269.548, "dur": 9.625, + "args": { + "External id": 458612,"Sequence number": 5036445, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1395 + } + }, + { + "ph": "f", "id": 145, "pid": 2338707, "tid": 2379456, "ts": 6267450785269.548, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450785274.252, "dur": 4.727, + "args": { + "External id": 458613,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450785275.521, "dur": 3.337, + "args": { + "External id": 458614,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267450785283.134, "dur": 26.218, + "args": { + "External id": 458615,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450785317.988, "dur": 9.394, + "args": { + "External id": 458616,"Record function id": 0, "Sequence number": 5036444, "Fwd thread id": 1, "Ev Idx": 1399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450785319.078, "dur": 6.027, + "args": { + "External id": 458617,"Sequence number": 5036444, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1400 + } + }, + { + "ph": "f", "id": 146, "pid": 2338707, "tid": 2379456, "ts": 6267450785319.078, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450785320.450, "dur": 4.402, + "args": { + "External id": 458618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450785321.264, "dur": 2.965, + "args": { + "External id": 458619,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450785323.405, "dur": 0.653, + "args": { + "External id": 458620,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450785332.393, "dur": 8.783, + "args": { + "External id": 458621,"Record function id": 0, "Ev Idx": 1404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450785334.486, "dur": 5.993, + "args": { + "External id": 458622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450785335.725, "dur": 4.333, + "args": { + "External id": 458623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450785338.665, "dur": 1.268, + "args": { + "External id": 458624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450785346.481, "dur": 537.696, + "args": { + "External id": 458625,"Record function id": 0, "Sequence number": 5036443, "Fwd thread id": 1, "Ev Idx": 1408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450785348.626, "dur": 489.479, + "args": { + "External id": 458626,"Sequence number": 5036443, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 1409 + } + }, + { + "ph": "f", "id": 147, "pid": 2338707, "tid": 2379456, "ts": 6267450785348.626, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2379456, + "ts": 6267450785377.213, "dur": 40.784, + "args": { + "External id": 458627,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267450785379.312, "dur": 38.400, + "args": { + "External id": 458628,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450785382.819, "dur": 7.687, + "args": { + "External id": 458629,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 1412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450785386.099, "dur": 3.773, + "args": { + "External id": 458630,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450785392.098, "dur": 24.976, + "args": { + "External id": 458631,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450785432.658, "dur": 2.757, + "args": { + "External id": 458632,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450785433.838, "dur": 1.386, + "args": { + "External id": 458633,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450785442.604, "dur": 2.034, + "args": { + "External id": 458634,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450785443.515, "dur": 1.009, + "args": { + "External id": 458635,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450785460.438, "dur": 3.471, + "args": { + "External id": 458636,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450785477.463, "dur": 4.394, + "args": { + "External id": 458637,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450785702.008, "dur": 3.741, + "args": { + "External id": 458638,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450785711.468, "dur": 44.009, + "args": { + "External id": 458639,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450785726.520, "dur": 1.329, + "args": { + "External id": 458640,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267450785762.651, "dur": 34.333, + "args": { + "External id": 458641,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267450785764.868, "dur": 31.862, + "args": { + "External id": 458642,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450785770.201, "dur": 4.746, + "args": { + "External id": 458643,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450785776.907, "dur": 19.129, + "args": { + "External id": 458644,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267450785802.700, "dur": 3.000, + "args": { + "External id": 458645,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450785804.458, "dur": 1.089, + "args": { + "External id": 458646,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450785817.833, "dur": 3.112, + "args": { + "External id": 458647,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450785819.420, "dur": 1.383, + "args": { + "External id": 458648,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450785823.660, "dur": 3.023, + "args": { + "External id": 458649,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450785825.025, "dur": 1.551, + "args": { + "External id": 458650,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267450785861.785, "dur": 20.335, + "args": { + "External id": 458651,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450785900.762, "dur": 12.033, + "args": { + "External id": 458652,"Record function id": 0, "Ev Idx": 1435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450785904.057, "dur": 7.745, + "args": { + "External id": 458653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450785906.921, "dur": 3.740, + "args": { + "External id": 458654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450785908.145, "dur": 2.375, + "args": { + "External id": 458655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450785917.926, "dur": 14.039, + "args": { + "External id": 458656,"Record function id": 0, "Sequence number": 5036442, "Fwd thread id": 1, "Ev Idx": 1439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450785920.053, "dur": 8.390, + "args": { + "External id": 458657,"Sequence number": 5036442, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1440 + } + }, + { + "ph": "f", "id": 148, "pid": 2338707, "tid": 2379456, "ts": 6267450785920.053, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450785925.120, "dur": 3.023, + "args": { + "External id": 458658,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450785926.234, "dur": 1.702, + "args": { + "External id": 458659,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450785936.414, "dur": 243.650, + "args": { + "External id": 458660,"Record function id": 0, "Sequence number": 5036441, "Fwd thread id": 1, "Ev Idx": 1443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450785937.681, "dur": 229.796, + "args": { + "External id": 458661,"Sequence number": 5036441, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1444 + } + }, + { + "ph": "f", "id": 149, "pid": 2338707, "tid": 2379456, "ts": 6267450785937.681, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450785942.712, "dur": 9.435, + "args": { + "External id": 458662,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450785944.866, "dur": 6.438, + "args": { + "External id": 458663,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450785949.632, "dur": 1.400, + "args": { + "External id": 458664,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450785953.527, "dur": 87.995, + "args": { + "External id": 458665,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450786042.968, "dur": 55.610, + "args": { + "External id": 458666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450786043.914, "dur": 52.549, + "args": { + "External id": 458667,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450786046.408, "dur": 8.880, + "args": { + "External id": 458668,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450786101.706, "dur": 8.213, + "args": { + "External id": 458669,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450786103.668, "dur": 5.427, + "args": { + "External id": 458670,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450786108.287, "dur": 0.653, + "args": { + "External id": 458671,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450786110.880, "dur": 55.629, + "args": { + "External id": 458672,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450786188.703, "dur": 10.002, + "args": { + "External id": 458673,"Record function id": 0, "Sequence number": 5036440, "Fwd thread id": 1, "Ev Idx": 1456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450786190.105, "dur": 5.875, + "args": { + "External id": 458674,"Sequence number": 5036440, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1457 + } + }, + { + "ph": "f", "id": 150, "pid": 2338707, "tid": 2379456, "ts": 6267450786190.105, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450786192.528, "dur": 3.250, + "args": { + "External id": 458675,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450786194.214, "dur": 1.400, + "args": { + "External id": 458676,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450786203.196, "dur": 12.871, + "args": { + "External id": 458677,"Record function id": 0, "Sequence number": 5036439, "Fwd thread id": 1, "Ev Idx": 1460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450786204.108, "dur": 8.413, + "args": { + "External id": 458678,"Sequence number": 5036439, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1461 + } + }, + { + "ph": "f", "id": 151, "pid": 2338707, "tid": 2379456, "ts": 6267450786204.108, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450786205.385, "dur": 6.866, + "args": { + "External id": 458679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450786208.714, "dur": 2.939, + "args": { + "External id": 458680,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450786210.507, "dur": 0.999, + "args": { + "External id": 458681,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450786221.441, "dur": 8.884, + "args": { + "External id": 458682,"Record function id": 0, "Ev Idx": 1465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450786223.392, "dur": 6.190, + "args": { + "External id": 458683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450786225.665, "dur": 3.417, + "args": { + "External id": 458684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450786226.782, "dur": 2.118, + "args": { + "External id": 458685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450786234.404, "dur": 9.355, + "args": { + "External id": 458686,"Record function id": 0, "Sequence number": 5036438, "Fwd thread id": 1, "Ev Idx": 1469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450786235.349, "dur": 5.223, + "args": { + "External id": 458687,"Sequence number": 5036438, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1470 + } + }, + { + "ph": "f", "id": 152, "pid": 2338707, "tid": 2379456, "ts": 6267450786235.349, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450786237.506, "dur": 2.856, + "args": { + "External id": 458688,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450786238.515, "dur": 1.644, + "args": { + "External id": 458689,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450786249.234, "dur": 446.763, + "args": { + "External id": 458690,"Record function id": 0, "Sequence number": 5036437, "Fwd thread id": 1, "Ev Idx": 1473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450786251.264, "dur": 416.840, + "args": { + "External id": 458691,"Sequence number": 5036437, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1474 + } + }, + { + "ph": "f", "id": 153, "pid": 2338707, "tid": 2379456, "ts": 6267450786251.264, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450786273.090, "dur": 8.350, + "args": { + "External id": 458692,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450786276.340, "dur": 4.562, + "args": { + "External id": 458693,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450786284.255, "dur": 6.743, + "args": { + "External id": 458694,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450786285.943, "dur": 4.741, + "args": { + "External id": 458695,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450786292.802, "dur": 4.609, + "args": { + "External id": 458696,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450786295.178, "dur": 1.947, + "args": { + "External id": 458697,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450786329.072, "dur": 304.596, + "args": { + "External id": 458698,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450786422.825, "dur": 6.282, + "args": { + "External id": 458699,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450786431.501, "dur": 4.724, + "args": { + "External id": 458700,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450786437.644, "dur": 1.951, + "args": { + "External id": 458701,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450786440.992, "dur": 2.204, + "args": { + "External id": 458702,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450786494.004, "dur": 4.691, + "args": { + "External id": 458703,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450786497.196, "dur": 1.373, + "args": { + "External id": 458704,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450786503.066, "dur": 32.669, + "args": { + "External id": 458705,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450786510.393, "dur": 0.783, + "args": { + "External id": 458706,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450786537.452, "dur": 2.159, + "args": { + "External id": 458707,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450786538.696, "dur": 0.813, + "args": { + "External id": 458708,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450786540.849, "dur": 19.149, + "args": { + "External id": 458709,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450786543.162, "dur": 3.033, + "args": { + "External id": 458710,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450786652.034, "dur": 4.776, + "args": { + "External id": 458711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450786660.516, "dur": 0.979, + "args": { + "External id": 458712,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267450786663.652, "dur": 0.886, + "args": { + "External id": 458713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450786708.843, "dur": 276.988, + "args": { + "External id": 458714,"Record function id": 0, "Sequence number": 5036436, "Fwd thread id": 1, "Ev Idx": 1497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450786711.732, "dur": 265.748, + "args": { + "External id": 458715,"Sequence number": 5036436, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1498 + } + }, + { + "ph": "f", "id": 154, "pid": 2338707, "tid": 2379456, "ts": 6267450786711.732, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267450786737.570, "dur": 54.109, + "args": { + "External id": 458716,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450786741.703, "dur": 4.699, + "args": { + "External id": 458717,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450786748.267, "dur": 42.741, + "args": { + "External id": 458718,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 1501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450786804.849, "dur": 5.765, + "args": { + "External id": 458719,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450786807.023, "dur": 3.237, + "args": { + "External id": 458720,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450786994.105, "dur": 263.791, + "args": { + "External id": 458721,"Record function id": 0, "Sequence number": 5036435, "Fwd thread id": 1, "Ev Idx": 1504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450786997.191, "dur": 251.850, + "args": { + "External id": 458722,"Sequence number": 5036435, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1505 + } + }, + { + "ph": "f", "id": 155, "pid": 2338707, "tid": 2379456, "ts": 6267450786997.191, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267450787019.052, "dur": 89.305, + "args": { + "External id": 458723,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450787022.366, "dur": 2.904, + "args": { + "External id": 458724,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450787026.444, "dur": 80.852, + "args": { + "External id": 458725,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 1508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267450787120.474, "dur": 9.874, + "args": { + "External id": 458726,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450787122.991, "dur": 7.030, + "args": { + "External id": 458727,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787268.664, "dur": 23.273, + "args": { + "External id": 458728,"Record function id": 0, "Sequence number": 5036434, "Fwd thread id": 1, "Ev Idx": 1511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787270.664, "dur": 17.842, + "args": { + "External id": 458729,"Sequence number": 5036434, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1512 + } + }, + { + "ph": "f", "id": 156, "pid": 2338707, "tid": 2379456, "ts": 6267450787270.664, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450787274.043, "dur": 14.068, + "args": { + "External id": 458730,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450787279.057, "dur": 8.702, + "args": { + "External id": 458731,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787299.229, "dur": 7.913, + "args": { + "External id": 458732,"Record function id": 0, "Sequence number": 5036433, "Fwd thread id": 1, "Ev Idx": 1515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787300.486, "dur": 4.340, + "args": { + "External id": 458733,"Sequence number": 5036433, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1516 + } + }, + { + "ph": "f", "id": 157, "pid": 2338707, "tid": 2379456, "ts": 6267450787300.486, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450787302.282, "dur": 2.366, + "args": { + "External id": 458734,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450787303.331, "dur": 1.215, + "args": { + "External id": 458735,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787311.013, "dur": 7.944, + "args": { + "External id": 458736,"Record function id": 0, "Sequence number": 5036432, "Fwd thread id": 1, "Ev Idx": 1519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787312.538, "dur": 4.433, + "args": { + "External id": 458737,"Sequence number": 5036432, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1520 + } + }, + { + "ph": "f", "id": 158, "pid": 2338707, "tid": 2379456, "ts": 6267450787312.538, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450787314.144, "dur": 2.641, + "args": { + "External id": 458738,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450787315.693, "dur": 0.952, + "args": { + "External id": 458739,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787323.349, "dur": 11.518, + "args": { + "External id": 458740,"Record function id": 0, "Sequence number": 5036431, "Fwd thread id": 1, "Ev Idx": 1523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787325.328, "dur": 6.219, + "args": { + "External id": 458741,"Sequence number": 5036431, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1524 + } + }, + { + "ph": "f", "id": 159, "pid": 2338707, "tid": 2379456, "ts": 6267450787325.328, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450787326.645, "dur": 4.713, + "args": { + "External id": 458742,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450787330.271, "dur": 0.967, + "args": { + "External id": 458743,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787339.208, "dur": 239.099, + "args": { + "External id": 458744,"Record function id": 0, "Sequence number": 5036430, "Fwd thread id": 1, "Ev Idx": 1527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787340.122, "dur": 227.309, + "args": { + "External id": 458745,"Sequence number": 5036430, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1528 + } + }, + { + "ph": "f", "id": 160, "pid": 2338707, "tid": 2379456, "ts": 6267450787340.122, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450787345.769, "dur": 8.644, + "args": { + "External id": 458746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450787348.270, "dur": 5.215, + "args": { + "External id": 458747,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450787351.405, "dur": 1.776, + "args": { + "External id": 458748,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450787359.056, "dur": 89.198, + "args": { + "External id": 458749,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450787450.368, "dur": 5.927, + "args": { + "External id": 458750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450787451.453, "dur": 3.732, + "args": { + "External id": 458751,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450787453.901, "dur": 1.079, + "args": { + "External id": 458752,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450787489.148, "dur": 15.317, + "args": { + "External id": 458753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450787490.661, "dur": 13.195, + "args": { + "External id": 458754,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450787503.326, "dur": 0.438, + "args": { + "External id": 458755,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450787505.216, "dur": 60.994, + "args": { + "External id": 458756,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787584.372, "dur": 27.622, + "args": { + "External id": 458757,"Record function id": 0, "Sequence number": 5036429, "Fwd thread id": 1, "Ev Idx": 1540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787585.745, "dur": 22.247, + "args": { + "External id": 458758,"Sequence number": 5036429, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1541 + } + }, + { + "ph": "f", "id": 161, "pid": 2338707, "tid": 2379456, "ts": 6267450787585.745, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450787588.543, "dur": 2.882, + "args": { + "External id": 458759,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450787589.908, "dur": 1.352, + "args": { + "External id": 458760,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787618.946, "dur": 14.077, + "args": { + "External id": 458761,"Record function id": 0, "Sequence number": 5036428, "Fwd thread id": 1, "Ev Idx": 1544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787620.272, "dur": 9.607, + "args": { + "External id": 458762,"Sequence number": 5036428, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1545 + } + }, + { + "ph": "f", "id": 162, "pid": 2338707, "tid": 2379456, "ts": 6267450787620.272, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450787621.658, "dur": 8.004, + "args": { + "External id": 458763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450787625.351, "dur": 3.450, + "args": { + "External id": 458764,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450787627.763, "dur": 0.879, + "args": { + "External id": 458765,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450787640.272, "dur": 12.003, + "args": { + "External id": 458766,"Record function id": 0, "Ev Idx": 1549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450787643.012, "dur": 8.282, + "args": { + "External id": 458767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450787646.136, "dur": 4.623, + "args": { + "External id": 458768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450787647.408, "dur": 3.263, + "args": { + "External id": 458769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787657.041, "dur": 7.868, + "args": { + "External id": 458770,"Record function id": 0, "Sequence number": 5036427, "Fwd thread id": 1, "Ev Idx": 1553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787659.026, "dur": 3.700, + "args": { + "External id": 458771,"Sequence number": 5036427, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1554 + } + }, + { + "ph": "f", "id": 163, "pid": 2338707, "tid": 2379456, "ts": 6267450787659.026, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450787660.542, "dur": 1.997, + "args": { + "External id": 458772,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450787661.333, "dur": 1.061, + "args": { + "External id": 458773,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787671.312, "dur": 127.231, + "args": { + "External id": 458774,"Record function id": 0, "Sequence number": 5036426, "Fwd thread id": 1, "Ev Idx": 1557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787672.463, "dur": 115.610, + "args": { + "External id": 458775,"Sequence number": 5036426, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1558 + } + }, + { + "ph": "f", "id": 164, "pid": 2338707, "tid": 2379456, "ts": 6267450787672.463, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450787676.021, "dur": 7.142, + "args": { + "External id": 458776,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450787676.609, "dur": 5.926, + "args": { + "External id": 458777,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450787681.683, "dur": 0.707, + "args": { + "External id": 458778,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450787684.154, "dur": 40.503, + "args": { + "External id": 458779,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450787728.964, "dur": 6.129, + "args": { + "External id": 458780,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450787729.586, "dur": 4.820, + "args": { + "External id": 458781,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450787731.566, "dur": 2.639, + "args": { + "External id": 458782,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450787736.599, "dur": 5.924, + "args": { + "External id": 458783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450787737.439, "dur": 4.480, + "args": { + "External id": 458784,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450787739.113, "dur": 2.702, + "args": { + "External id": 458785,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450787745.522, "dur": 41.439, + "args": { + "External id": 458786,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787806.629, "dur": 39.125, + "args": { + "External id": 458787,"Record function id": 0, "Sequence number": 5036425, "Fwd thread id": 1, "Ev Idx": 1570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787807.809, "dur": 4.244, + "args": { + "External id": 458788,"Sequence number": 5036425, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1571 + } + }, + { + "ph": "f", "id": 165, "pid": 2338707, "tid": 2379456, "ts": 6267450787807.809, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450787809.513, "dur": 2.343, + "args": { + "External id": 458789,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450787810.546, "dur": 1.124, + "args": { + "External id": 458790,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267450787815.833, "dur": 26.715, + "args": { + "External id": 458791,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787850.493, "dur": 9.580, + "args": { + "External id": 458792,"Record function id": 0, "Sequence number": 5036424, "Fwd thread id": 1, "Ev Idx": 1575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787851.634, "dur": 5.432, + "args": { + "External id": 458793,"Sequence number": 5036424, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1576 + } + }, + { + "ph": "f", "id": 166, "pid": 2338707, "tid": 2379456, "ts": 6267450787851.634, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450787852.442, "dur": 4.353, + "args": { + "External id": 458794,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450787853.167, "dur": 2.973, + "args": { + "External id": 458795,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450787855.113, "dur": 0.888, + "args": { + "External id": 458796,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450787864.925, "dur": 9.801, + "args": { + "External id": 458797,"Record function id": 0, "Ev Idx": 1580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450787866.773, "dur": 7.333, + "args": { + "External id": 458798,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450787868.082, "dur": 5.439, + "args": { + "External id": 458799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450787871.906, "dur": 1.449, + "args": { + "External id": 458800,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787878.693, "dur": 8.033, + "args": { + "External id": 458801,"Record function id": 0, "Sequence number": 5036423, "Fwd thread id": 1, "Ev Idx": 1584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787880.650, "dur": 3.854, + "args": { + "External id": 458802,"Sequence number": 5036423, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1585 + } + }, + { + "ph": "f", "id": 167, "pid": 2338707, "tid": 2379456, "ts": 6267450787880.650, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450787882.443, "dur": 1.870, + "args": { + "External id": 458803,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450787883.333, "dur": 0.853, + "args": { + "External id": 458804,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787890.641, "dur": 118.481, + "args": { + "External id": 458805,"Record function id": 0, "Sequence number": 5036422, "Fwd thread id": 1, "Ev Idx": 1588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450787891.651, "dur": 107.546, + "args": { + "External id": 458806,"Sequence number": 5036422, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1589 + } + }, + { + "ph": "f", "id": 168, "pid": 2338707, "tid": 2379456, "ts": 6267450787891.651, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450787894.994, "dur": 5.534, + "args": { + "External id": 458807,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450787897.884, "dur": 2.072, + "args": { + "External id": 458808,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450787899.172, "dur": 0.641, + "args": { + "External id": 458809,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450787901.186, "dur": 43.414, + "args": { + "External id": 458810,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450787945.976, "dur": 4.530, + "args": { + "External id": 458811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450787946.503, "dur": 3.315, + "args": { + "External id": 458812,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450787948.425, "dur": 1.242, + "args": { + "External id": 458813,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450787955.240, "dur": 4.450, + "args": { + "External id": 458814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450787956.464, "dur": 2.490, + "args": { + "External id": 458815,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450787958.249, "dur": 0.625, + "args": { + "External id": 458816,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450787960.245, "dur": 38.067, + "args": { + "External id": 458817,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450788014.511, "dur": 32.916, + "args": { + "External id": 458818,"Record function id": 0, "Sequence number": 5036421, "Fwd thread id": 1, "Ev Idx": 1601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450788015.902, "dur": 7.082, + "args": { + "External id": 458819,"Sequence number": 5036421, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1602 + } + }, + { + "ph": "f", "id": 169, "pid": 2338707, "tid": 2379456, "ts": 6267450788015.902, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450788020.547, "dur": 2.248, + "args": { + "External id": 458820,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450788021.631, "dur": 1.049, + "args": { + "External id": 458821,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267450788025.784, "dur": 18.128, + "args": { + "External id": 458822,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450788051.767, "dur": 50.923, + "args": { + "External id": 458823,"Record function id": 0, "Sequence number": 5036420, "Fwd thread id": 1, "Ev Idx": 1606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267450788052.842, "dur": 46.144, + "args": { + "External id": 458824,"Sequence number": 5036420, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1607 + } + }, + { + "ph": "f", "id": 170, "pid": 2338707, "tid": 2379456, "ts": 6267450788052.842, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267450788053.636, "dur": 45.064, + "args": { + "External id": 458825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267450788054.704, "dur": 42.885, + "args": { + "External id": 458826,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450788095.926, "dur": 1.155, + "args": { + "External id": 458827,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450788109.743, "dur": 8.270, + "args": { + "External id": 458828,"Record function id": 0, "Ev Idx": 1611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450788112.294, "dur": 5.084, + "args": { + "External id": 458829,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450788114.137, "dur": 2.806, + "args": { + "External id": 458830,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450788115.259, "dur": 1.592, + "args": { + "External id": 458831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450788123.494, "dur": 446.481, + "args": { + "External id": 458832,"Record function id": 0, "Sequence number": 5036419, "Fwd thread id": 1, "Ev Idx": 1615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450788125.607, "dur": 403.848, + "args": { + "External id": 458833,"Sequence number": 5036419, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1616 + } + }, + { + "ph": "f", "id": 171, "pid": 2338707, "tid": 2379456, "ts": 6267450788125.607, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450788171.980, "dur": 2.734, + "args": { + "External id": 458834,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450788172.956, "dur": 1.600, + "args": { + "External id": 458835,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450788193.847, "dur": 8.239, + "args": { + "External id": 458836,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450788214.253, "dur": 2.461, + "args": { + "External id": 458837,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450788398.388, "dur": 1.913, + "args": { + "External id": 458838,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450788405.691, "dur": 44.835, + "args": { + "External id": 458839,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450788419.907, "dur": 1.174, + "args": { + "External id": 458840,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267450788457.324, "dur": 42.010, + "args": { + "External id": 458841,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267450788462.073, "dur": 36.946, + "args": { + "External id": 458842,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450788467.607, "dur": 5.387, + "args": { + "External id": 458843,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450788475.107, "dur": 23.264, + "args": { + "External id": 458844,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267450788504.496, "dur": 3.159, + "args": { + "External id": 458845,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450788506.271, "dur": 1.224, + "args": { + "External id": 458846,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450788516.145, "dur": 5.106, + "args": { + "External id": 458847,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450788520.191, "dur": 0.944, + "args": { + "External id": 458848,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267450788544.926, "dur": 19.416, + "args": { + "External id": 458849,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450788581.033, "dur": 9.367, + "args": { + "External id": 458850,"Record function id": 0, "Ev Idx": 1633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450788584.081, "dur": 5.452, + "args": { + "External id": 458851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450788585.687, "dur": 2.660, + "args": { + "External id": 458852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450788586.464, "dur": 1.749, + "args": { + "External id": 458853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450788621.370, "dur": 3503.848, + "args": { + "External id": 458854,"Record function id": 0, "Ev Idx": 1637 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.25)", "pid": 2338707, "tid": 2379456, + "ts": 6267450788663.743, "dur": 1190.029, + "args": { + "External id": 458855,"Record function id": 0, "Ev Idx": 1638 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.24", "pid": 2338707, "tid": 2379456, + "ts": 6267450788694.331, "dur": 1149.468, + "args": { + "External id": 458856,"Record function id": 0, "Ev Idx": 1639 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338707, "tid": 2379456, + "ts": 6267450788713.919, "dur": 1110.956, + "args": { + "External id": 458857,"Record function id": 0, "Ev Idx": 1640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450788819.312, "dur": 8.467, + "args": { + "External id": 458858,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267450788847.052, "dur": 40.919, + "args": { + "External id": 458859,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450788852.879, "dur": 1.867, + "args": { + "External id": 458860,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450788858.667, "dur": 3.046, + "args": { + "External id": 458861,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450788863.169, "dur": 0.587, + "args": { + "External id": 458862,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450788865.821, "dur": 2.009, + "args": { + "External id": 458863,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450788869.675, "dur": 0.657, + "args": { + "External id": 458864,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450788872.316, "dur": 0.530, + "args": { + "External id": 458865,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450788876.602, "dur": 0.381, + "args": { + "External id": 458866,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450788878.951, "dur": 0.415, + "args": { + "External id": 458867,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450788881.073, "dur": 0.382, + "args": { + "External id": 458868,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450788902.963, "dur": 59.539, + "args": { + "External id": 458869,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267450789015.132, "dur": 204.295, + "args": { + "External id": 458870,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 1653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450789030.495, "dur": 6.838, + "args": { + "External id": 458871,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267450789045.236, "dur": 57.564, + "args": { + "External id": 458872,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267450789050.144, "dur": 52.115, + "args": { + "External id": 458873,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 1656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450789054.306, "dur": 44.765, + "args": { + "External id": 458874,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267450789114.407, "dur": 36.547, + "args": { + "External id": 458875,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450789117.292, "dur": 0.651, + "args": { + "External id": 458876,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450789119.787, "dur": 0.528, + "args": { + "External id": 458877,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450789122.011, "dur": 0.461, + "args": { + "External id": 458878,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450789126.483, "dur": 0.316, + "args": { + "External id": 458879,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450789128.666, "dur": 0.739, + "args": { + "External id": 458880,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450789131.136, "dur": 2.154, + "args": { + "External id": 458881,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450789135.035, "dur": 2.882, + "args": { + "External id": 458882,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450789139.717, "dur": 0.477, + "args": { + "External id": 458883,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450789143.443, "dur": 0.340, + "args": { + "External id": 458884,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450789166.651, "dur": 41.739, + "args": { + "External id": 458885,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267450789299.117, "dur": 392.249, + "args": { + "External id": 458886,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 1669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267450789344.185, "dur": 340.187, + "args": { + "External id": 458887,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1670, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267450789356.803, "dur": 319.289, + "args": { + "External id": 458888,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 1671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267450789722.104, "dur": 3.441, + "args": { + "External id": 458889,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1672, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450789863.461, "dur": 2232.082, + "args": { + "External id": 458890,"Sequence number": 5036418, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1673 + } + }, + { + "ph": "f", "id": 172, "pid": 2338707, "tid": 2379456, "ts": 6267450789863.461, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450790016.127, "dur": 184.774, + "args": { + "External id": 458891,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 1674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267450790260.608, "dur": 51.979, + "args": { + "External id": 458892,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 1675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267450790338.768, "dur": 67.142, + "args": { + "External id": 458893,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 1676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450790418.195, "dur": 37.844, + "args": { + "External id": 458894,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450790463.292, "dur": 40.812, + "args": { + "External id": 458895,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450790514.700, "dur": 32.475, + "args": { + "External id": 458896,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450790557.137, "dur": 53.310, + "args": { + "External id": 458897,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267450790649.520, "dur": 34.077, + "args": { + "External id": 458898,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 1681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267450790709.934, "dur": 36.514, + "args": { + "External id": 458899,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267450790773.182, "dur": 23.538, + "args": { + "External id": 458900,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267450790817.584, "dur": 18.181, + "args": { + "External id": 458901,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450790847.997, "dur": 47.894, + "args": { + "External id": 458902,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450790900.279, "dur": 38.105, + "args": { + "External id": 458903,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267450790977.982, "dur": 362.663, + "args": { + "External id": 458904,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450791115.367, "dur": 12.009, + "args": { + "External id": 458905,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450791130.343, "dur": 4.566, + "args": { + "External id": 458906,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450791136.206, "dur": 2.309, + "args": { + "External id": 458907,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450791139.902, "dur": 2.096, + "args": { + "External id": 458908,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450791201.994, "dur": 8.468, + "args": { + "External id": 458909,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450791204.506, "dur": 5.696, + "args": { + "External id": 458910,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450791212.940, "dur": 40.928, + "args": { + "External id": 458911,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450791220.528, "dur": 2.254, + "args": { + "External id": 458912,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267450791255.622, "dur": 5.636, + "args": { + "External id": 458913,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450791260.303, "dur": 0.863, + "args": { + "External id": 458914,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267450791262.478, "dur": 19.243, + "args": { + "External id": 458915,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450791265.474, "dur": 0.520, + "args": { + "External id": 458916,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267450791387.860, "dur": 35.147, + "args": { + "External id": 458917,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267450791443.642, "dur": 20.137, + "args": { + "External id": 458918,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450791472.630, "dur": 56.112, + "args": { + "External id": 458919,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450791539.878, "dur": 45.790, + "args": { + "External id": 458920,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450791617.176, "dur": 29.824, + "args": { + "External id": 458921,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450791655.044, "dur": 36.506, + "args": { + "External id": 458922,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450791700.458, "dur": 32.202, + "args": { + "External id": 458923,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267450791743.443, "dur": 36.312, + "args": { + "External id": 458924,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267450791809.118, "dur": 31.174, + "args": { + "External id": 458925,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 1708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267450791863.324, "dur": 31.493, + "args": { + "External id": 458926,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267450791914.169, "dur": 22.194, + "args": { + "External id": 458927,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267450791959.524, "dur": 18.603, + "args": { + "External id": 458928,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267450791996.584, "dur": 22.236, + "args": { + "External id": 458929,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 1712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792152.687, "dur": 22.844, + "args": { + "External id": 458930,"Record function id": 0, "Ev Idx": 1713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792157.105, "dur": 17.140, + "args": { + "External id": 458931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792162.244, "dur": 10.967, + "args": { + "External id": 458932,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792164.418, "dur": 8.681, + "args": { + "External id": 458933,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792180.379, "dur": 6.198, + "args": { + "External id": 458934,"Record function id": 0, "Ev Idx": 1717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792182.394, "dur": 3.559, + "args": { + "External id": 458935,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792183.311, "dur": 1.860, + "args": { + "External id": 458936,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792183.836, "dur": 1.178, + "args": { + "External id": 458937,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792190.643, "dur": 5.556, + "args": { + "External id": 458938,"Record function id": 0, "Ev Idx": 1721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792192.265, "dur": 3.415, + "args": { + "External id": 458939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792193.275, "dur": 1.653, + "args": { + "External id": 458940,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792194.123, "dur": 0.659, + "args": { + "External id": 458941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792200.192, "dur": 5.036, + "args": { + "External id": 458942,"Record function id": 0, "Ev Idx": 1725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792201.775, "dur": 2.984, + "args": { + "External id": 458943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792202.964, "dur": 1.261, + "args": { + "External id": 458944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792203.300, "dur": 0.842, + "args": { + "External id": 458945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792209.265, "dur": 14.449, + "args": { + "External id": 458946,"Record function id": 0, "Ev Idx": 1729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792210.729, "dur": 12.503, + "args": { + "External id": 458947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792220.717, "dur": 1.614, + "args": { + "External id": 458948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792221.380, "dur": 0.872, + "args": { + "External id": 458949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792227.610, "dur": 7.217, + "args": { + "External id": 458950,"Record function id": 0, "Ev Idx": 1733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792229.213, "dur": 5.102, + "args": { + "External id": 458951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792230.075, "dur": 3.729, + "args": { + "External id": 458952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792232.965, "dur": 0.702, + "args": { + "External id": 458953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792240.440, "dur": 5.198, + "args": { + "External id": 458954,"Record function id": 0, "Ev Idx": 1737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792241.993, "dur": 3.172, + "args": { + "External id": 458955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792243.115, "dur": 1.543, + "args": { + "External id": 458956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792243.879, "dur": 0.680, + "args": { + "External id": 458957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792249.545, "dur": 7.860, + "args": { + "External id": 458958,"Record function id": 0, "Ev Idx": 1741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792251.259, "dur": 5.637, + "args": { + "External id": 458959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792252.137, "dur": 4.200, + "args": { + "External id": 458960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792252.480, "dur": 3.768, + "args": { + "External id": 458961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792261.217, "dur": 5.387, + "args": { + "External id": 458962,"Record function id": 0, "Ev Idx": 1745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267450792263.198, "dur": 2.934, + "args": { + "External id": 458963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792263.949, "dur": 1.594, + "args": { + "External id": 458964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267450792264.625, "dur": 0.839, + "args": { + "External id": 458965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450792271.990, "dur": 279481.217, + "args": { + "External id": 458966,"Record function id": 0, "Sequence number": 5036417, "Fwd thread id": 1, "Ev Idx": 1749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267450792273.761, "dur": 279468.266, + "args": { + "External id": 458967,"Sequence number": 5036417, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1750 + } + }, + { + "ph": "f", "id": 173, "pid": 2338707, "tid": 2379456, "ts": 6267450792273.761, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.25)", "pid": 2338707, "tid": 2379456, + "ts": 6267450792313.975, "dur": 52.758, + "args": { + "External id": 458968,"Record function id": 0, "Ev Idx": 1751 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.25)", "pid": 2338707, "tid": 2379456, + "ts": 6267450792376.034, "dur": 100.908, + "args": { + "External id": 458969,"Record function id": 0, "Ev Idx": 1752 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.25)", "pid": 2338707, "tid": 2379456, + "ts": 6267450792484.447, "dur": 279247.528, + "args": { + "External id": 458970,"Record function id": 0, "Ev Idx": 1753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450792552.945, "dur": 10.244, + "args": { + "External id": 458971,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267450792575.533, "dur": 5.481, + "args": { + "External id": 458972,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267450792624.670, "dur": 278066.620, + "args": { + "External id": 458973,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267450792643.785, "dur": 278030.188, + "args": { + "External id": 458974,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267450792788.811, "dur": 6.657, + "args": { + "External id": 458975,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267450792823.516, "dur": 277790.210, + "args": { + "External id": 458976,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 1759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267450792827.015, "dur": 277785.319, + "args": { + "External id": 458977,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 1760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267450792832.592, "dur": 17.088, + "args": { + "External id": 458978,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267450792852.414, "dur": 277753.299, + "args": { + "External id": 458979,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 1762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451070826.025, "dur": 22.065, + "args": { + "External id": 458980,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 1763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451070837.036, "dur": 10.538, + "args": { + "External id": 458981,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451070891.764, "dur": 403.221, + "args": { + "External id": 458982,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 1765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451070930.807, "dur": 357.497, + "args": { + "External id": 458983,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1766, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451070944.790, "dur": 336.079, + "args": { + "External id": 458984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 1767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451071325.331, "dur": 2.889, + "args": { + "External id": 458985,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1768, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451071404.886, "dur": 7.823, + "args": { + "External id": 458986,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451071489.559, "dur": 1.747, + "args": { + "External id": 458987,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451071514.802, "dur": 1.809, + "args": { + "External id": 458988,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451071534.350, "dur": 0.837, + "args": { + "External id": 458989,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451071552.984, "dur": 0.976, + "args": { + "External id": 458990,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451071572.556, "dur": 0.847, + "args": { + "External id": 458991,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451071590.630, "dur": 16.412, + "args": { + "External id": 458992,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451071629.044, "dur": 3.962, + "args": { + "External id": 458993,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451071647.206, "dur": 1.055, + "args": { + "External id": 458994,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451071773.021, "dur": 3674.453, + "args": { + "External id": 458995,"Record function id": 0, "Ev Idx": 1778 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.24)", "pid": 2338707, "tid": 2379456, + "ts": 6267451071799.143, "dur": 1426.036, + "args": { + "External id": 458996,"Record function id": 0, "Ev Idx": 1779 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338707, "tid": 2379456, + "ts": 6267451071819.002, "dur": 481.559, + "args": { + "External id": 458997,"Record function id": 0, "Ev Idx": 1780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451071932.326, "dur": 4.731, + "args": { + "External id": 458998,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451071941.050, "dur": 1.528, + "args": { + "External id": 458999,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451071944.656, "dur": 1.275, + "args": { + "External id": 459000,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451071948.181, "dur": 1.330, + "args": { + "External id": 459001,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451071954.271, "dur": 1.089, + "args": { + "External id": 459002,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451071957.352, "dur": 1.072, + "args": { + "External id": 459003,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451071960.237, "dur": 3.127, + "args": { + "External id": 459004,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451071965.193, "dur": 3.156, + "args": { + "External id": 459005,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451071972.406, "dur": 1.191, + "args": { + "External id": 459006,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451071975.455, "dur": 0.942, + "args": { + "External id": 459007,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451071999.566, "dur": 257.332, + "args": { + "External id": 459008,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451072019.601, "dur": 230.537, + "args": { + "External id": 459009,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451072096.480, "dur": 17.736, + "args": { + "External id": 459010,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451072121.809, "dur": 91.422, + "args": { + "External id": 459011,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 1794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451072124.992, "dur": 87.765, + "args": { + "External id": 459012,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 1795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072132.387, "dur": 9.046, + "args": { + "External id": 459013,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451072146.377, "dur": 65.571, + "args": { + "External id": 459014,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 1797 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.23", "pid": 2338707, "tid": 2379456, + "ts": 6267451072416.829, "dur": 798.323, + "args": { + "External id": 459015,"Record function id": 0, "Ev Idx": 1798 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338707, "tid": 2379456, + "ts": 6267451072439.366, "dur": 759.768, + "args": { + "External id": 459016,"Record function id": 0, "Ev Idx": 1799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451072516.859, "dur": 5.906, + "args": { + "External id": 459017,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451072540.676, "dur": 40.927, + "args": { + "External id": 459018,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072547.072, "dur": 1.927, + "args": { + "External id": 459019,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072551.252, "dur": 2.253, + "args": { + "External id": 459020,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072555.278, "dur": 2.820, + "args": { + "External id": 459021,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072560.060, "dur": 0.561, + "args": { + "External id": 459022,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072563.934, "dur": 0.396, + "args": { + "External id": 459023,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072565.725, "dur": 0.421, + "args": { + "External id": 459024,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072568.166, "dur": 0.478, + "args": { + "External id": 459025,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072571.457, "dur": 0.386, + "args": { + "External id": 459026,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072573.494, "dur": 0.696, + "args": { + "External id": 459027,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451072611.310, "dur": 53.222, + "args": { + "External id": 459028,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267451072705.594, "dur": 150.509, + "args": { + "External id": 459029,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 1812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451072718.424, "dur": 4.723, + "args": { + "External id": 459030,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267451072730.073, "dur": 22.266, + "args": { + "External id": 459031,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267451072742.249, "dur": 9.556, + "args": { + "External id": 459032,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 1815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072746.791, "dur": 3.168, + "args": { + "External id": 459033,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451072760.940, "dur": 35.065, + "args": { + "External id": 459034,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072763.899, "dur": 0.483, + "args": { + "External id": 459035,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072767.767, "dur": 0.612, + "args": { + "External id": 459036,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072770.099, "dur": 0.459, + "args": { + "External id": 459037,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072772.040, "dur": 1.877, + "args": { + "External id": 459038,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072775.886, "dur": 0.519, + "args": { + "External id": 459039,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072778.125, "dur": 0.354, + "args": { + "External id": 459040,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072782.279, "dur": 0.449, + "args": { + "External id": 459041,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072784.263, "dur": 2.790, + "args": { + "External id": 459042,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451072788.791, "dur": 0.323, + "args": { + "External id": 459043,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451072811.706, "dur": 34.437, + "args": { + "External id": 459044,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451072911.966, "dur": 192.592, + "args": { + "External id": 459045,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 1828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451072949.079, "dur": 150.361, + "args": { + "External id": 459046,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1829, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451072960.633, "dur": 130.540, + "args": { + "External id": 459047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 1830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451073129.061, "dur": 2.989, + "args": { + "External id": 459048,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1831, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451073235.390, "dur": 2186.103, + "args": { + "External id": 459049,"Sequence number": 5036416, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1832 + } + }, + { + "ph": "f", "id": 174, "pid": 2338707, "tid": 2379456, "ts": 6267451073235.390, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451073372.676, "dur": 135.278, + "args": { + "External id": 459050,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 1833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451073558.890, "dur": 71.590, + "args": { + "External id": 459051,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 1834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267451073659.258, "dur": 67.101, + "args": { + "External id": 459052,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 1835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451073739.099, "dur": 37.506, + "args": { + "External id": 459053,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451073786.499, "dur": 37.953, + "args": { + "External id": 459054,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451073831.812, "dur": 32.328, + "args": { + "External id": 459055,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451073876.438, "dur": 33.876, + "args": { + "External id": 459056,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451073941.746, "dur": 30.316, + "args": { + "External id": 459057,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 1840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451073996.338, "dur": 36.718, + "args": { + "External id": 459058,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451074101.218, "dur": 28.314, + "args": { + "External id": 459059,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451074153.723, "dur": 21.321, + "args": { + "External id": 459060,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451074186.863, "dur": 51.189, + "args": { + "External id": 459061,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451074243.192, "dur": 39.866, + "args": { + "External id": 459062,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267451074335.265, "dur": 344.260, + "args": { + "External id": 459063,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451074445.118, "dur": 8.656, + "args": { + "External id": 459064,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451074456.545, "dur": 2.951, + "args": { + "External id": 459065,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451074460.880, "dur": 4.331, + "args": { + "External id": 459066,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451074466.520, "dur": 3.333, + "args": { + "External id": 459067,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451074530.937, "dur": 6.088, + "args": { + "External id": 459068,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451074533.273, "dur": 3.492, + "args": { + "External id": 459069,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451074539.469, "dur": 41.329, + "args": { + "External id": 459070,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451074546.277, "dur": 4.112, + "args": { + "External id": 459071,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451074582.835, "dur": 2.254, + "args": { + "External id": 459072,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451074584.112, "dur": 0.874, + "args": { + "External id": 459073,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451074586.132, "dur": 40.492, + "args": { + "External id": 459074,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451074590.499, "dur": 0.480, + "args": { + "External id": 459075,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451074729.854, "dur": 36.796, + "args": { + "External id": 459076,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451074787.852, "dur": 20.861, + "args": { + "External id": 459077,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451074817.944, "dur": 58.688, + "args": { + "External id": 459078,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451074884.640, "dur": 48.175, + "args": { + "External id": 459079,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451074944.936, "dur": 26.311, + "args": { + "External id": 459080,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451074976.837, "dur": 37.946, + "args": { + "External id": 459081,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451075023.137, "dur": 69.639, + "args": { + "External id": 459082,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451075106.879, "dur": 42.803, + "args": { + "External id": 459083,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267451075180.282, "dur": 35.544, + "args": { + "External id": 459084,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 1867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451075236.018, "dur": 31.079, + "args": { + "External id": 459085,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451075286.281, "dur": 24.235, + "args": { + "External id": 459086,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451075329.379, "dur": 18.604, + "args": { + "External id": 459087,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267451075364.493, "dur": 21.026, + "args": { + "External id": 459088,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 1871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075474.557, "dur": 18.159, + "args": { + "External id": 459089,"Record function id": 0, "Ev Idx": 1872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075478.619, "dur": 12.880, + "args": { + "External id": 459090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075483.857, "dur": 6.250, + "args": { + "External id": 459091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075485.519, "dur": 4.484, + "args": { + "External id": 459092,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075497.953, "dur": 9.534, + "args": { + "External id": 459093,"Record function id": 0, "Ev Idx": 1876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075499.887, "dur": 7.060, + "args": { + "External id": 459094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075501.065, "dur": 5.262, + "args": { + "External id": 459095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075501.974, "dur": 4.240, + "args": { + "External id": 459096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075511.713, "dur": 6.381, + "args": { + "External id": 459097,"Record function id": 0, "Ev Idx": 1880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075513.726, "dur": 3.780, + "args": { + "External id": 459098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075514.882, "dur": 2.090, + "args": { + "External id": 459099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075516.011, "dur": 0.848, + "args": { + "External id": 459100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075522.127, "dur": 4.733, + "args": { + "External id": 459101,"Record function id": 0, "Ev Idx": 1884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075523.731, "dur": 2.613, + "args": { + "External id": 459102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075524.640, "dur": 1.206, + "args": { + "External id": 459103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075525.011, "dur": 0.733, + "args": { + "External id": 459104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075530.661, "dur": 4.694, + "args": { + "External id": 459105,"Record function id": 0, "Ev Idx": 1888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075532.131, "dur": 2.688, + "args": { + "External id": 459106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075532.731, "dur": 1.632, + "args": { + "External id": 459107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075533.206, "dur": 1.077, + "args": { + "External id": 459108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075539.211, "dur": 4.876, + "args": { + "External id": 459109,"Record function id": 0, "Ev Idx": 1892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075540.725, "dur": 2.821, + "args": { + "External id": 459110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075541.468, "dur": 1.545, + "args": { + "External id": 459111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075542.087, "dur": 0.852, + "args": { + "External id": 459112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075548.093, "dur": 15.248, + "args": { + "External id": 459113,"Record function id": 0, "Ev Idx": 1896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075549.979, "dur": 12.841, + "args": { + "External id": 459114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075550.753, "dur": 11.587, + "args": { + "External id": 459115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075561.201, "dur": 1.030, + "args": { + "External id": 459116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075567.249, "dur": 4.824, + "args": { + "External id": 459117,"Record function id": 0, "Ev Idx": 1900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075568.633, "dur": 2.916, + "args": { + "External id": 459118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075569.373, "dur": 1.499, + "args": { + "External id": 459119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075569.913, "dur": 0.857, + "args": { + "External id": 459120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075575.844, "dur": 7.512, + "args": { + "External id": 459121,"Record function id": 0, "Ev Idx": 1904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451075577.785, "dur": 5.074, + "args": { + "External id": 459122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075578.715, "dur": 3.481, + "args": { + "External id": 459123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451075579.266, "dur": 2.855, + "args": { + "External id": 459124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451075587.906, "dur": 77681.055, + "args": { + "External id": 459125,"Record function id": 0, "Sequence number": 5036415, "Fwd thread id": 1, "Ev Idx": 1908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451075589.353, "dur": 77667.271, + "args": { + "External id": 459126,"Sequence number": 5036415, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1909 + } + }, + { + "ph": "f", "id": 175, "pid": 2338707, "tid": 2379456, "ts": 6267451075589.353, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.24)", "pid": 2338707, "tid": 2379456, + "ts": 6267451075648.318, "dur": 48.485, + "args": { + "External id": 459127,"Record function id": 0, "Ev Idx": 1910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.24)", "pid": 2338707, "tid": 2379456, + "ts": 6267451075706.713, "dur": 81.449, + "args": { + "External id": 459128,"Record function id": 0, "Ev Idx": 1911 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.24)", "pid": 2338707, "tid": 2379456, + "ts": 6267451075795.609, "dur": 77450.585, + "args": { + "External id": 459129,"Record function id": 0, "Ev Idx": 1912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451075908.208, "dur": 9.374, + "args": { + "External id": 459130,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451075930.603, "dur": 6.192, + "args": { + "External id": 459131,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451075957.586, "dur": 76158.607, + "args": { + "External id": 459132,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451075977.504, "dur": 76122.607, + "args": { + "External id": 459133,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451076177.227, "dur": 28.822, + "args": { + "External id": 459134,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451076232.336, "dur": 75787.801, + "args": { + "External id": 459135,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 1918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451076235.674, "dur": 75783.298, + "args": { + "External id": 459136,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 1919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451076241.247, "dur": 18.349, + "args": { + "External id": 459137,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451076262.045, "dur": 75751.508, + "args": { + "External id": 459138,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 1921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451152241.396, "dur": 13.532, + "args": { + "External id": 459139,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 1922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451152245.640, "dur": 8.717, + "args": { + "External id": 459140,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451152291.081, "dur": 515.519, + "args": { + "External id": 459141,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 1924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451152330.706, "dur": 468.490, + "args": { + "External id": 459142,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1925, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451152346.561, "dur": 445.090, + "args": { + "External id": 459143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 1926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451152840.997, "dur": 3.276, + "args": { + "External id": 459144,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1927, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451152923.392, "dur": 8.141, + "args": { + "External id": 459145,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451152991.068, "dur": 3.867, + "args": { + "External id": 459146,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451153016.575, "dur": 1.364, + "args": { + "External id": 459147,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451153034.956, "dur": 1.111, + "args": { + "External id": 459148,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451153089.517, "dur": 2.115, + "args": { + "External id": 459149,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451153113.390, "dur": 3.203, + "args": { + "External id": 459150,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451153130.906, "dur": 1.362, + "args": { + "External id": 459151,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451153145.752, "dur": 3.047, + "args": { + "External id": 459152,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451153165.678, "dur": 1.027, + "args": { + "External id": 459153,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451153289.260, "dur": 3666.416, + "args": { + "External id": 459154,"Record function id": 0, "Ev Idx": 1937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.23)", "pid": 2338707, "tid": 2379456, + "ts": 6267451153313.819, "dur": 1417.980, + "args": { + "External id": 459155,"Record function id": 0, "Ev Idx": 1938 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338707, "tid": 2379456, + "ts": 6267451153333.358, "dur": 475.733, + "args": { + "External id": 459156,"Record function id": 0, "Ev Idx": 1939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451153450.581, "dur": 7.625, + "args": { + "External id": 459157,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451153462.050, "dur": 1.232, + "args": { + "External id": 459158,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451153465.318, "dur": 1.405, + "args": { + "External id": 459159,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451153469.018, "dur": 1.160, + "args": { + "External id": 459160,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451153471.849, "dur": 1.180, + "args": { + "External id": 459161,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451153474.858, "dur": 1.059, + "args": { + "External id": 459162,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451153477.700, "dur": 3.015, + "args": { + "External id": 459163,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451153485.173, "dur": 1.174, + "args": { + "External id": 459164,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451153487.865, "dur": 3.138, + "args": { + "External id": 459165,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451153492.671, "dur": 0.998, + "args": { + "External id": 459166,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451153529.686, "dur": 236.294, + "args": { + "External id": 459167,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451153551.649, "dur": 207.695, + "args": { + "External id": 459168,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451153586.502, "dur": 36.680, + "args": { + "External id": 459169,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451153633.175, "dur": 89.542, + "args": { + "External id": 459170,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 1953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451153636.635, "dur": 85.551, + "args": { + "External id": 459171,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 1954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451153642.020, "dur": 9.076, + "args": { + "External id": 459172,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451153653.197, "dur": 67.949, + "args": { + "External id": 459173,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 1956 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.22", "pid": 2338707, "tid": 2379456, + "ts": 6267451153924.722, "dur": 796.880, + "args": { + "External id": 459174,"Record function id": 0, "Ev Idx": 1957 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338707, "tid": 2379456, + "ts": 6267451153945.892, "dur": 760.003, + "args": { + "External id": 459175,"Record function id": 0, "Ev Idx": 1958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451154023.758, "dur": 6.952, + "args": { + "External id": 459176,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451154049.154, "dur": 87.470, + "args": { + "External id": 459177,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154095.019, "dur": 2.381, + "args": { + "External id": 459178,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154100.578, "dur": 2.389, + "args": { + "External id": 459179,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154104.849, "dur": 0.609, + "args": { + "External id": 459180,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154107.133, "dur": 2.683, + "args": { + "External id": 459181,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154117.484, "dur": 0.424, + "args": { + "External id": 459182,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154119.733, "dur": 0.471, + "args": { + "External id": 459183,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154121.855, "dur": 0.415, + "args": { + "External id": 459184,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154125.529, "dur": 0.748, + "args": { + "External id": 459185,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154127.677, "dur": 0.683, + "args": { + "External id": 459186,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451154150.307, "dur": 55.139, + "args": { + "External id": 459187,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267451154246.582, "dur": 139.789, + "args": { + "External id": 459188,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 1971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451154260.213, "dur": 5.528, + "args": { + "External id": 459189,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267451154272.313, "dur": 12.271, + "args": { + "External id": 459190,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267451154277.682, "dur": 6.371, + "args": { + "External id": 459191,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 1974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154281.924, "dur": 0.625, + "args": { + "External id": 459192,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451154292.749, "dur": 38.543, + "args": { + "External id": 459193,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154295.500, "dur": 3.343, + "args": { + "External id": 459194,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154302.405, "dur": 0.617, + "args": { + "External id": 459195,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154304.642, "dur": 0.711, + "args": { + "External id": 459196,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154307.164, "dur": 2.388, + "args": { + "External id": 459197,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154311.069, "dur": 0.394, + "args": { + "External id": 459198,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154313.111, "dur": 0.444, + "args": { + "External id": 459199,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154317.334, "dur": 0.352, + "args": { + "External id": 459200,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154319.285, "dur": 0.367, + "args": { + "External id": 459201,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451154321.511, "dur": 2.663, + "args": { + "External id": 459202,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451154343.520, "dur": 32.850, + "args": { + "External id": 459203,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451154445.382, "dur": 167.168, + "args": { + "External id": 459204,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 1987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451154483.441, "dur": 106.697, + "args": { + "External id": 459205,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1988, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451154495.318, "dur": 89.089, + "args": { + "External id": 459206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 1989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451154637.358, "dur": 3.235, + "args": { + "External id": 459207,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1990, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451154741.109, "dur": 2185.807, + "args": { + "External id": 459208,"Sequence number": 5036414, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1991 + } + }, + { + "ph": "f", "id": 176, "pid": 2338707, "tid": 2379456, "ts": 6267451154741.109, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451154876.554, "dur": 135.130, + "args": { + "External id": 459209,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 1992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451155103.205, "dur": 55.282, + "args": { + "External id": 459210,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 1993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267451155186.962, "dur": 71.820, + "args": { + "External id": 459211,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 1994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451155271.908, "dur": 39.240, + "args": { + "External id": 459212,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451155318.943, "dur": 41.934, + "args": { + "External id": 459213,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451155370.375, "dur": 33.845, + "args": { + "External id": 459214,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451155415.835, "dur": 35.814, + "args": { + "External id": 459215,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451155483.887, "dur": 30.702, + "args": { + "External id": 459216,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 1999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451155536.702, "dur": 36.456, + "args": { + "External id": 459217,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451155620.870, "dur": 29.250, + "args": { + "External id": 459218,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451155668.902, "dur": 22.687, + "args": { + "External id": 459219,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451155702.897, "dur": 71.033, + "args": { + "External id": 459220,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451155785.863, "dur": 44.110, + "args": { + "External id": 459221,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267451155874.567, "dur": 356.363, + "args": { + "External id": 459222,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451155979.937, "dur": 14.013, + "args": { + "External id": 459223,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451155996.484, "dur": 2.825, + "args": { + "External id": 459224,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451156000.671, "dur": 1.804, + "args": { + "External id": 459225,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451156003.696, "dur": 3.853, + "args": { + "External id": 459226,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451156098.500, "dur": 7.222, + "args": { + "External id": 459227,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451156101.368, "dur": 3.904, + "args": { + "External id": 459228,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451156108.197, "dur": 41.474, + "args": { + "External id": 459229,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451156114.729, "dur": 3.926, + "args": { + "External id": 459230,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451156151.788, "dur": 1.998, + "args": { + "External id": 459231,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451156152.943, "dur": 0.661, + "args": { + "External id": 459232,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451156155.183, "dur": 17.110, + "args": { + "External id": 459233,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451156157.701, "dur": 0.736, + "args": { + "External id": 459234,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451156279.827, "dur": 35.344, + "args": { + "External id": 459235,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451156337.032, "dur": 19.672, + "args": { + "External id": 459236,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451156366.131, "dur": 54.624, + "args": { + "External id": 459237,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451156428.179, "dur": 43.789, + "args": { + "External id": 459238,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451156483.813, "dur": 23.843, + "args": { + "External id": 459239,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451156514.328, "dur": 35.516, + "args": { + "External id": 459240,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451156557.915, "dur": 33.167, + "args": { + "External id": 459241,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451156617.775, "dur": 41.213, + "args": { + "External id": 459242,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267451156687.387, "dur": 33.283, + "args": { + "External id": 459243,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451156740.502, "dur": 31.108, + "args": { + "External id": 459244,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451156790.604, "dur": 22.836, + "args": { + "External id": 459245,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451156835.006, "dur": 18.994, + "args": { + "External id": 459246,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267451156872.118, "dur": 19.969, + "args": { + "External id": 459247,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451156982.674, "dur": 18.588, + "args": { + "External id": 459248,"Record function id": 0, "Ev Idx": 2031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451156987.495, "dur": 12.805, + "args": { + "External id": 459249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451156992.460, "dur": 6.886, + "args": { + "External id": 459250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451156994.422, "dur": 4.804, + "args": { + "External id": 459251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451157006.082, "dur": 5.478, + "args": { + "External id": 459252,"Record function id": 0, "Ev Idx": 2035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451157008.136, "dur": 2.936, + "args": { + "External id": 459253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451157008.976, "dur": 1.564, + "args": { + "External id": 459254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451157009.551, "dur": 0.910, + "args": { + "External id": 459255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451157015.692, "dur": 8.544, + "args": { + "External id": 459256,"Record function id": 0, "Ev Idx": 2039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451157017.380, "dur": 6.322, + "args": { + "External id": 459257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451157018.043, "dur": 5.092, + "args": { + "External id": 459258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451157019.102, "dur": 3.925, + "args": { + "External id": 459259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451157028.425, "dur": 4.322, + "args": { + "External id": 459260,"Record function id": 0, "Ev Idx": 2043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451157029.920, "dur": 2.362, + "args": { + "External id": 459261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451157030.532, "dur": 1.242, + "args": { + "External id": 459262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451157030.854, "dur": 0.821, + "args": { + "External id": 459263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451157036.619, "dur": 87.806, + "args": { + "External id": 459264,"Record function id": 0, "Ev Idx": 2047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451157116.250, "dur": 6.652, + "args": { + "External id": 459265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451157118.667, "dur": 2.744, + "args": { + "External id": 459266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451157119.350, "dur": 1.743, + "args": { + "External id": 459267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451157130.941, "dur": 14.017, + "args": { + "External id": 459268,"Record function id": 0, "Ev Idx": 2051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451157139.540, "dur": 4.928, + "args": { + "External id": 459269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451157140.224, "dur": 3.656, + "args": { + "External id": 459270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451157143.064, "dur": 0.701, + "args": { + "External id": 459271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451157150.176, "dur": 4.667, + "args": { + "External id": 459272,"Record function id": 0, "Ev Idx": 2055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451157151.958, "dur": 2.380, + "args": { + "External id": 459273,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451157152.529, "dur": 1.240, + "args": { + "External id": 459274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451157152.907, "dur": 0.756, + "args": { + "External id": 459275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451157158.836, "dur": 4.789, + "args": { + "External id": 459276,"Record function id": 0, "Ev Idx": 2059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451157160.548, "dur": 2.559, + "args": { + "External id": 459277,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451157161.103, "dur": 1.220, + "args": { + "External id": 459278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451157161.504, "dur": 0.734, + "args": { + "External id": 459279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451157167.413, "dur": 4.416, + "args": { + "External id": 459280,"Record function id": 0, "Ev Idx": 2063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451157168.943, "dur": 2.406, + "args": { + "External id": 459281,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451157169.552, "dur": 1.303, + "args": { + "External id": 459282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451157170.114, "dur": 0.660, + "args": { + "External id": 459283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451157176.718, "dur": 72056.132, + "args": { + "External id": 459284,"Record function id": 0, "Sequence number": 5036413, "Fwd thread id": 1, "Ev Idx": 2067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451157180.324, "dur": 72040.421, + "args": { + "External id": 459285,"Sequence number": 5036413, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2068 + } + }, + { + "ph": "f", "id": 177, "pid": 2338707, "tid": 2379456, "ts": 6267451157180.324, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.23)", "pid": 2338707, "tid": 2379456, + "ts": 6267451157223.136, "dur": 48.509, + "args": { + "External id": 459286,"Record function id": 0, "Ev Idx": 2069 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.23)", "pid": 2338707, "tid": 2379456, + "ts": 6267451157281.318, "dur": 81.757, + "args": { + "External id": 459287,"Record function id": 0, "Ev Idx": 2070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.23)", "pid": 2338707, "tid": 2379456, + "ts": 6267451157369.783, "dur": 71840.637, + "args": { + "External id": 459288,"Record function id": 0, "Ev Idx": 2071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451157507.176, "dur": 9.739, + "args": { + "External id": 459289,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451157533.571, "dur": 8.761, + "args": { + "External id": 459290,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451157564.398, "dur": 70590.073, + "args": { + "External id": 459291,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451157581.363, "dur": 70557.001, + "args": { + "External id": 459292,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451157712.567, "dur": 21.998, + "args": { + "External id": 459293,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451157760.155, "dur": 70320.274, + "args": { + "External id": 459294,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451157763.846, "dur": 70287.754, + "args": { + "External id": 459295,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451157769.495, "dur": 18.408, + "args": { + "External id": 459296,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451157790.122, "dur": 70256.301, + "args": { + "External id": 459297,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451228276.835, "dur": 14.031, + "args": { + "External id": 459298,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451228281.144, "dur": 9.246, + "args": { + "External id": 459299,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451228327.578, "dur": 461.571, + "args": { + "External id": 459300,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451228367.559, "dur": 414.691, + "args": { + "External id": 459301,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2084, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451228386.084, "dur": 388.979, + "args": { + "External id": 459302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451228820.425, "dur": 2.733, + "args": { + "External id": 459303,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2086, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451228898.907, "dur": 7.950, + "args": { + "External id": 459304,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451228963.753, "dur": 1.682, + "args": { + "External id": 459305,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451228986.758, "dur": 4.269, + "args": { + "External id": 459306,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451229006.330, "dur": 1.259, + "args": { + "External id": 459307,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451229022.897, "dur": 1.108, + "args": { + "External id": 459308,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451229037.299, "dur": 1.243, + "args": { + "External id": 459309,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451229088.170, "dur": 4.901, + "args": { + "External id": 459310,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451229113.510, "dur": 3.584, + "args": { + "External id": 459311,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451229132.816, "dur": 1.242, + "args": { + "External id": 459312,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451229251.630, "dur": 3619.044, + "args": { + "External id": 459313,"Record function id": 0, "Ev Idx": 2096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.22)", "pid": 2338707, "tid": 2379456, + "ts": 6267451229276.850, "dur": 1368.637, + "args": { + "External id": 459314,"Record function id": 0, "Ev Idx": 2097 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338707, "tid": 2379456, + "ts": 6267451229295.959, "dur": 445.643, + "args": { + "External id": 459315,"Record function id": 0, "Ev Idx": 2098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451229406.750, "dur": 5.007, + "args": { + "External id": 459316,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451229415.630, "dur": 1.129, + "args": { + "External id": 459317,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451229419.126, "dur": 3.193, + "args": { + "External id": 459318,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451229424.724, "dur": 1.152, + "args": { + "External id": 459319,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451229427.774, "dur": 1.231, + "args": { + "External id": 459320,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451229431.011, "dur": 0.812, + "args": { + "External id": 459321,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451229433.627, "dur": 3.181, + "args": { + "External id": 459322,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451229441.022, "dur": 0.881, + "args": { + "External id": 459323,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451229443.692, "dur": 0.928, + "args": { + "External id": 459324,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451229446.867, "dur": 1.072, + "args": { + "External id": 459325,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451229468.531, "dur": 231.426, + "args": { + "External id": 459326,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451229487.581, "dur": 205.322, + "args": { + "External id": 459327,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451229527.625, "dur": 17.778, + "args": { + "External id": 459328,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451229550.087, "dur": 105.913, + "args": { + "External id": 459329,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451229555.907, "dur": 99.653, + "args": { + "External id": 459330,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451229561.306, "dur": 7.604, + "args": { + "External id": 459331,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451229571.000, "dur": 83.393, + "args": { + "External id": 459332,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2115 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.21", "pid": 2338707, "tid": 2379456, + "ts": 6267451229856.583, "dur": 779.496, + "args": { + "External id": 459333,"Record function id": 0, "Ev Idx": 2116 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338707, "tid": 2379456, + "ts": 6267451229877.401, "dur": 741.591, + "args": { + "External id": 459334,"Record function id": 0, "Ev Idx": 2117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451229955.825, "dur": 6.832, + "args": { + "External id": 459335,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451229980.511, "dur": 40.850, + "args": { + "External id": 459336,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451229986.896, "dur": 2.067, + "args": { + "External id": 459337,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451229993.242, "dur": 0.371, + "args": { + "External id": 459338,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451229995.328, "dur": 0.346, + "args": { + "External id": 459339,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451229997.290, "dur": 0.573, + "args": { + "External id": 459340,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451230001.446, "dur": 0.421, + "args": { + "External id": 459341,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451230003.710, "dur": 3.204, + "args": { + "External id": 459342,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451230008.361, "dur": 2.241, + "args": { + "External id": 459343,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451230012.281, "dur": 0.394, + "args": { + "External id": 459344,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451230014.089, "dur": 0.424, + "args": { + "External id": 459345,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451230033.454, "dur": 92.903, + "args": { + "External id": 459346,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267451230168.277, "dur": 146.845, + "args": { + "External id": 459347,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451230181.723, "dur": 5.155, + "args": { + "External id": 459348,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267451230193.564, "dur": 13.065, + "args": { + "External id": 459349,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267451230198.618, "dur": 7.499, + "args": { + "External id": 459350,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451230203.219, "dur": 0.879, + "args": { + "External id": 459351,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451230215.029, "dur": 36.604, + "args": { + "External id": 459352,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451230217.809, "dur": 2.216, + "args": { + "External id": 459353,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451230221.903, "dur": 0.580, + "args": { + "External id": 459354,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451230224.355, "dur": 2.854, + "args": { + "External id": 459355,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451230231.136, "dur": 0.415, + "args": { + "External id": 459356,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451230233.098, "dur": 0.445, + "args": { + "External id": 459357,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451230234.926, "dur": 0.431, + "args": { + "External id": 459358,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451230238.134, "dur": 0.523, + "args": { + "External id": 459359,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451230240.170, "dur": 0.411, + "args": { + "External id": 459360,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451230242.342, "dur": 1.977, + "args": { + "External id": 459361,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451230267.085, "dur": 37.791, + "args": { + "External id": 459362,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451230372.047, "dur": 142.856, + "args": { + "External id": 459363,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451230406.669, "dur": 104.210, + "args": { + "External id": 459364,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2147, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451230420.487, "dur": 84.919, + "args": { + "External id": 459365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451230536.075, "dur": 2.135, + "args": { + "External id": 459366,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2149, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451230655.063, "dur": 2186.833, + "args": { + "External id": 459367,"Sequence number": 5036412, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2150 + } + }, + { + "ph": "f", "id": 178, "pid": 2338707, "tid": 2379456, "ts": 6267451230655.063, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451230790.734, "dur": 135.257, + "args": { + "External id": 459368,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451230974.798, "dur": 47.606, + "args": { + "External id": 459369,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267451231042.738, "dur": 114.458, + "args": { + "External id": 459370,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451231190.485, "dur": 43.478, + "args": { + "External id": 459371,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451231242.537, "dur": 40.775, + "args": { + "External id": 459372,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451231291.381, "dur": 33.777, + "args": { + "External id": 459373,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451231335.512, "dur": 34.697, + "args": { + "External id": 459374,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451231409.286, "dur": 32.042, + "args": { + "External id": 459375,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451231463.637, "dur": 35.557, + "args": { + "External id": 459376,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451231524.758, "dur": 25.872, + "args": { + "External id": 459377,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451231566.853, "dur": 19.018, + "args": { + "External id": 459378,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451231614.626, "dur": 51.153, + "args": { + "External id": 459379,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451231670.900, "dur": 40.059, + "args": { + "External id": 459380,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267451231748.941, "dur": 385.815, + "args": { + "External id": 459381,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451231869.927, "dur": 12.646, + "args": { + "External id": 459382,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451231885.499, "dur": 4.615, + "args": { + "External id": 459383,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451231891.443, "dur": 3.641, + "args": { + "External id": 459384,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451231896.586, "dur": 2.148, + "args": { + "External id": 459385,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451231963.017, "dur": 9.163, + "args": { + "External id": 459386,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451231968.414, "dur": 3.530, + "args": { + "External id": 459387,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451231974.634, "dur": 42.619, + "args": { + "External id": 459388,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451231981.270, "dur": 4.580, + "args": { + "External id": 459389,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451232019.314, "dur": 2.218, + "args": { + "External id": 459390,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451232020.629, "dur": 0.807, + "args": { + "External id": 459391,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451232022.765, "dur": 18.195, + "args": { + "External id": 459392,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451232025.200, "dur": 0.672, + "args": { + "External id": 459393,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451232184.102, "dur": 36.586, + "args": { + "External id": 459394,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451232243.138, "dur": 21.612, + "args": { + "External id": 459395,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451232274.237, "dur": 60.430, + "args": { + "External id": 459396,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451232342.504, "dur": 48.624, + "args": { + "External id": 459397,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451232402.390, "dur": 27.003, + "args": { + "External id": 459398,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451232436.042, "dur": 37.464, + "args": { + "External id": 459399,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451232482.025, "dur": 32.288, + "args": { + "External id": 459400,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451232523.155, "dur": 36.541, + "args": { + "External id": 459401,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267451232583.340, "dur": 46.414, + "args": { + "External id": 459402,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451232653.861, "dur": 29.775, + "args": { + "External id": 459403,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451232703.219, "dur": 22.771, + "args": { + "External id": 459404,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451232747.452, "dur": 18.846, + "args": { + "External id": 459405,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267451232784.157, "dur": 20.157, + "args": { + "External id": 459406,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232897.423, "dur": 18.568, + "args": { + "External id": 459407,"Record function id": 0, "Ev Idx": 2190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232901.439, "dur": 13.284, + "args": { + "External id": 459408,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232906.863, "dur": 6.705, + "args": { + "External id": 459409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232908.694, "dur": 4.775, + "args": { + "External id": 459410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232921.016, "dur": 5.768, + "args": { + "External id": 459411,"Record function id": 0, "Ev Idx": 2194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232922.738, "dur": 3.476, + "args": { + "External id": 459412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232923.725, "dur": 1.905, + "args": { + "External id": 459413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232924.615, "dur": 0.903, + "args": { + "External id": 459414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232931.048, "dur": 8.108, + "args": { + "External id": 459415,"Record function id": 0, "Ev Idx": 2198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232932.647, "dur": 5.957, + "args": { + "External id": 459416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232933.402, "dur": 4.682, + "args": { + "External id": 459417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232934.251, "dur": 3.706, + "args": { + "External id": 459418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232943.077, "dur": 4.990, + "args": { + "External id": 459419,"Record function id": 0, "Ev Idx": 2202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232944.679, "dur": 2.887, + "args": { + "External id": 459420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232945.497, "dur": 1.472, + "args": { + "External id": 459421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232946.050, "dur": 0.845, + "args": { + "External id": 459422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232951.927, "dur": 5.219, + "args": { + "External id": 459423,"Record function id": 0, "Ev Idx": 2206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232953.490, "dur": 3.175, + "args": { + "External id": 459424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232954.396, "dur": 1.702, + "args": { + "External id": 459425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232955.215, "dur": 0.801, + "args": { + "External id": 459426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232960.952, "dur": 5.146, + "args": { + "External id": 459427,"Record function id": 0, "Ev Idx": 2210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232962.423, "dur": 3.175, + "args": { + "External id": 459428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232963.401, "dur": 1.688, + "args": { + "External id": 459429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232964.248, "dur": 0.760, + "args": { + "External id": 459430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232970.080, "dur": 4.993, + "args": { + "External id": 459431,"Record function id": 0, "Ev Idx": 2214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232971.562, "dur": 3.002, + "args": { + "External id": 459432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232972.670, "dur": 1.385, + "args": { + "External id": 459433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232973.031, "dur": 0.942, + "args": { + "External id": 459434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232978.782, "dur": 7.154, + "args": { + "External id": 459435,"Record function id": 0, "Ev Idx": 2218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232980.135, "dur": 5.311, + "args": { + "External id": 459436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232981.048, "dur": 3.843, + "args": { + "External id": 459437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232983.986, "dur": 0.824, + "args": { + "External id": 459438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232991.451, "dur": 5.264, + "args": { + "External id": 459439,"Record function id": 0, "Ev Idx": 2222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451232993.000, "dur": 3.202, + "args": { + "External id": 459440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232994.257, "dur": 1.320, + "args": { + "External id": 459441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451232994.639, "dur": 0.857, + "args": { + "External id": 459442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451233001.455, "dur": 66366.926, + "args": { + "External id": 459443,"Record function id": 0, "Sequence number": 5036411, "Fwd thread id": 1, "Ev Idx": 2226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451233003.254, "dur": 66352.868, + "args": { + "External id": 459444,"Sequence number": 5036411, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2227 + } + }, + { + "ph": "f", "id": 179, "pid": 2338707, "tid": 2379456, "ts": 6267451233003.254, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.22)", "pid": 2338707, "tid": 2379456, + "ts": 6267451233040.223, "dur": 87.859, + "args": { + "External id": 459445,"Record function id": 0, "Ev Idx": 2228 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.22)", "pid": 2338707, "tid": 2379456, + "ts": 6267451233139.930, "dur": 78.494, + "args": { + "External id": 459446,"Record function id": 0, "Ev Idx": 2229 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.22)", "pid": 2338707, "tid": 2379456, + "ts": 6267451233225.217, "dur": 66121.235, + "args": { + "External id": 459447,"Record function id": 0, "Ev Idx": 2230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451233336.461, "dur": 8.787, + "args": { + "External id": 459448,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451233358.466, "dur": 8.447, + "args": { + "External id": 459449,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451233388.721, "dur": 64897.057, + "args": { + "External id": 459450,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451233408.048, "dur": 64861.313, + "args": { + "External id": 459451,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451233537.174, "dur": 24.706, + "args": { + "External id": 459452,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451233586.721, "dur": 64628.722, + "args": { + "External id": 459453,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451233589.885, "dur": 64624.511, + "args": { + "External id": 459454,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451233615.592, "dur": 20.567, + "args": { + "External id": 459455,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451233638.959, "dur": 64572.136, + "args": { + "External id": 459456,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451298408.823, "dur": 12.825, + "args": { + "External id": 459457,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451298413.150, "dur": 7.924, + "args": { + "External id": 459458,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451298457.666, "dur": 460.915, + "args": { + "External id": 459459,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451298497.304, "dur": 414.476, + "args": { + "External id": 459460,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2243, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451298513.202, "dur": 390.532, + "args": { + "External id": 459461,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451298950.510, "dur": 3.099, + "args": { + "External id": 459462,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2245, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451299031.997, "dur": 8.201, + "args": { + "External id": 459463,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451299141.110, "dur": 2.661, + "args": { + "External id": 459464,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451299163.856, "dur": 4.684, + "args": { + "External id": 459465,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451299185.230, "dur": 1.159, + "args": { + "External id": 459466,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451299203.305, "dur": 1.007, + "args": { + "External id": 459467,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451299218.489, "dur": 0.876, + "args": { + "External id": 459468,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451299233.275, "dur": 3.859, + "args": { + "External id": 459469,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451299250.642, "dur": 3.546, + "args": { + "External id": 459470,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451299269.175, "dur": 1.086, + "args": { + "External id": 459471,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451299388.555, "dur": 3665.738, + "args": { + "External id": 459472,"Record function id": 0, "Ev Idx": 2255 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.21)", "pid": 2338707, "tid": 2379456, + "ts": 6267451299414.345, "dur": 1373.525, + "args": { + "External id": 459473,"Record function id": 0, "Ev Idx": 2256 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338707, "tid": 2379456, + "ts": 6267451299433.780, "dur": 446.563, + "args": { + "External id": 459474,"Record function id": 0, "Ev Idx": 2257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451299544.622, "dur": 5.285, + "args": { + "External id": 459475,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451299553.956, "dur": 1.220, + "args": { + "External id": 459476,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451299557.448, "dur": 3.450, + "args": { + "External id": 459477,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451299562.933, "dur": 1.304, + "args": { + "External id": 459478,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451299565.941, "dur": 1.273, + "args": { + "External id": 459479,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451299569.047, "dur": 1.077, + "args": { + "External id": 459480,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451299572.002, "dur": 2.580, + "args": { + "External id": 459481,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451299579.500, "dur": 1.115, + "args": { + "External id": 459482,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451299582.413, "dur": 1.107, + "args": { + "External id": 459483,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451299585.769, "dur": 0.771, + "args": { + "External id": 459484,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451299625.160, "dur": 214.124, + "args": { + "External id": 459485,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451299646.159, "dur": 186.912, + "args": { + "External id": 459486,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451299684.829, "dur": 20.292, + "args": { + "External id": 459487,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451299711.980, "dur": 84.488, + "args": { + "External id": 459488,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451299715.047, "dur": 81.077, + "args": { + "External id": 459489,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451299721.482, "dur": 7.850, + "args": { + "External id": 459490,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451299731.347, "dur": 63.901, + "args": { + "External id": 459491,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2274 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.20", "pid": 2338707, "tid": 2379456, + "ts": 6267451299995.337, "dur": 782.609, + "args": { + "External id": 459492,"Record function id": 0, "Ev Idx": 2275 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338707, "tid": 2379456, + "ts": 6267451300018.117, "dur": 744.273, + "args": { + "External id": 459493,"Record function id": 0, "Ev Idx": 2276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451300141.983, "dur": 7.920, + "args": { + "External id": 459494,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451300168.850, "dur": 42.777, + "args": { + "External id": 459495,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300175.213, "dur": 2.059, + "args": { + "External id": 459496,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300180.033, "dur": 1.761, + "args": { + "External id": 459497,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300183.882, "dur": 0.707, + "args": { + "External id": 459498,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300186.605, "dur": 0.362, + "args": { + "External id": 459499,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300190.620, "dur": 0.453, + "args": { + "External id": 459500,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300192.854, "dur": 2.961, + "args": { + "External id": 459501,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300197.242, "dur": 0.549, + "args": { + "External id": 459502,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300200.709, "dur": 0.547, + "args": { + "External id": 459503,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300202.942, "dur": 0.474, + "args": { + "External id": 459504,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451300224.099, "dur": 58.495, + "args": { + "External id": 459505,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267451300322.425, "dur": 132.043, + "args": { + "External id": 459506,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451300334.722, "dur": 3.615, + "args": { + "External id": 459507,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267451300344.561, "dur": 12.460, + "args": { + "External id": 459508,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267451300349.563, "dur": 6.926, + "args": { + "External id": 459509,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300354.415, "dur": 0.619, + "args": { + "External id": 459510,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451300364.680, "dur": 33.251, + "args": { + "External id": 459511,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300367.176, "dur": 0.494, + "args": { + "External id": 459512,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300370.841, "dur": 0.341, + "args": { + "External id": 459513,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300373.089, "dur": 2.767, + "args": { + "External id": 459514,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300377.579, "dur": 1.762, + "args": { + "External id": 459515,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300380.901, "dur": 0.476, + "args": { + "External id": 459516,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300382.884, "dur": 0.499, + "args": { + "External id": 459517,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300385.685, "dur": 0.496, + "args": { + "External id": 459518,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300388.282, "dur": 0.521, + "args": { + "External id": 459519,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451300390.442, "dur": 0.833, + "args": { + "External id": 459520,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451300410.189, "dur": 34.254, + "args": { + "External id": 459521,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451300509.156, "dur": 162.441, + "args": { + "External id": 459522,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451300544.441, "dur": 122.151, + "args": { + "External id": 459523,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2306, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451300555.644, "dur": 105.477, + "args": { + "External id": 459524,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451300694.436, "dur": 2.639, + "args": { + "External id": 459525,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2308, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451300797.450, "dur": 2229.193, + "args": { + "External id": 459526,"Sequence number": 5036410, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2309 + } + }, + { + "ph": "f", "id": 180, "pid": 2338707, "tid": 2379456, "ts": 6267451300797.450, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451300937.632, "dur": 177.503, + "args": { + "External id": 459527,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451301172.819, "dur": 50.130, + "args": { + "External id": 459528,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267451301245.019, "dur": 70.967, + "args": { + "External id": 459529,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451301331.342, "dur": 38.157, + "args": { + "External id": 459530,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451301376.530, "dur": 39.498, + "args": { + "External id": 459531,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451301423.170, "dur": 32.902, + "args": { + "External id": 459532,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451301464.159, "dur": 33.806, + "args": { + "External id": 459533,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451301532.445, "dur": 28.099, + "args": { + "External id": 459534,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451301583.919, "dur": 54.244, + "args": { + "External id": 459535,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451301668.821, "dur": 24.409, + "args": { + "External id": 459536,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451301709.992, "dur": 17.482, + "args": { + "External id": 459537,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451301738.618, "dur": 51.015, + "args": { + "External id": 459538,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451301795.143, "dur": 40.410, + "args": { + "External id": 459539,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267451301875.467, "dur": 397.998, + "args": { + "External id": 459540,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451301979.407, "dur": 7.880, + "args": { + "External id": 459541,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451301989.963, "dur": 3.001, + "args": { + "External id": 459542,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451301994.764, "dur": 3.074, + "args": { + "External id": 459543,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451301999.477, "dur": 2.139, + "args": { + "External id": 459544,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451302095.750, "dur": 7.462, + "args": { + "External id": 459545,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451302098.943, "dur": 3.798, + "args": { + "External id": 459546,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451302106.257, "dur": 63.820, + "args": { + "External id": 459547,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451302114.049, "dur": 4.501, + "args": { + "External id": 459548,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451302176.739, "dur": 2.951, + "args": { + "External id": 459549,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451302178.420, "dur": 1.188, + "args": { + "External id": 459550,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451302181.518, "dur": 23.258, + "args": { + "External id": 459551,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451302185.057, "dur": 0.817, + "args": { + "External id": 459552,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451302323.302, "dur": 36.642, + "args": { + "External id": 459553,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451302383.890, "dur": 20.534, + "args": { + "External id": 459554,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451302415.871, "dur": 63.552, + "args": { + "External id": 459555,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451302488.345, "dur": 50.596, + "args": { + "External id": 459556,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451302552.359, "dur": 25.205, + "args": { + "External id": 459557,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451302585.636, "dur": 61.188, + "args": { + "External id": 459558,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451302660.853, "dur": 34.919, + "args": { + "External id": 459559,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451302704.390, "dur": 37.598, + "args": { + "External id": 459560,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267451302765.256, "dur": 29.640, + "args": { + "External id": 459561,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451302835.812, "dur": 30.760, + "args": { + "External id": 459562,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451302887.106, "dur": 24.124, + "args": { + "External id": 459563,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451302932.244, "dur": 18.924, + "args": { + "External id": 459564,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267451302968.406, "dur": 20.619, + "args": { + "External id": 459565,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303119.318, "dur": 21.543, + "args": { + "External id": 459566,"Record function id": 0, "Ev Idx": 2349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303124.430, "dur": 14.968, + "args": { + "External id": 459567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303129.773, "dur": 7.784, + "args": { + "External id": 459568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303132.137, "dur": 5.139, + "args": { + "External id": 459569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303145.694, "dur": 6.530, + "args": { + "External id": 459570,"Record function id": 0, "Ev Idx": 2353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303147.552, "dur": 4.102, + "args": { + "External id": 459571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303148.396, "dur": 2.445, + "args": { + "External id": 459572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303149.494, "dur": 1.269, + "args": { + "External id": 459573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303156.510, "dur": 8.074, + "args": { + "External id": 459574,"Record function id": 0, "Ev Idx": 2357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303158.116, "dur": 5.954, + "args": { + "External id": 459575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303159.223, "dur": 4.340, + "args": { + "External id": 459576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303159.762, "dur": 3.728, + "args": { + "External id": 459577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303168.459, "dur": 5.231, + "args": { + "External id": 459578,"Record function id": 0, "Ev Idx": 2361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303170.134, "dur": 3.061, + "args": { + "External id": 459579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303170.993, "dur": 1.720, + "args": { + "External id": 459580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303171.505, "dur": 1.126, + "args": { + "External id": 459581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303177.630, "dur": 5.474, + "args": { + "External id": 459582,"Record function id": 0, "Ev Idx": 2365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303179.424, "dur": 3.173, + "args": { + "External id": 459583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303180.404, "dur": 1.627, + "args": { + "External id": 459584,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303181.013, "dur": 0.943, + "args": { + "External id": 459585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303187.062, "dur": 5.395, + "args": { + "External id": 459586,"Record function id": 0, "Ev Idx": 2369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303188.475, "dur": 3.305, + "args": { + "External id": 459587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303189.314, "dur": 1.620, + "args": { + "External id": 459588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303189.883, "dur": 0.921, + "args": { + "External id": 459589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303196.464, "dur": 4.350, + "args": { + "External id": 459590,"Record function id": 0, "Ev Idx": 2373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303197.852, "dur": 2.440, + "args": { + "External id": 459591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303198.539, "dur": 1.262, + "args": { + "External id": 459592,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303198.897, "dur": 0.823, + "args": { + "External id": 459593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303204.650, "dur": 4.926, + "args": { + "External id": 459594,"Record function id": 0, "Ev Idx": 2377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303206.204, "dur": 2.840, + "args": { + "External id": 459595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303207.082, "dur": 1.345, + "args": { + "External id": 459596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303207.484, "dur": 0.813, + "args": { + "External id": 459597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303217.485, "dur": 5.384, + "args": { + "External id": 459598,"Record function id": 0, "Ev Idx": 2381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451303219.172, "dur": 3.165, + "args": { + "External id": 459599,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303219.757, "dur": 2.060, + "args": { + "External id": 459600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451303220.944, "dur": 0.732, + "args": { + "External id": 459601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451303227.957, "dur": 70727.474, + "args": { + "External id": 459602,"Record function id": 0, "Sequence number": 5036409, "Fwd thread id": 1, "Ev Idx": 2385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451303229.576, "dur": 70714.089, + "args": { + "External id": 459603,"Sequence number": 5036409, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2386 + } + }, + { + "ph": "f", "id": 181, "pid": 2338707, "tid": 2379456, "ts": 6267451303229.576, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.21)", "pid": 2338707, "tid": 2379456, + "ts": 6267451303274.424, "dur": 49.884, + "args": { + "External id": 459604,"Record function id": 0, "Ev Idx": 2387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.21)", "pid": 2338707, "tid": 2379456, + "ts": 6267451303333.697, "dur": 79.986, + "args": { + "External id": 459605,"Record function id": 0, "Ev Idx": 2388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.21)", "pid": 2338707, "tid": 2379456, + "ts": 6267451303420.077, "dur": 70513.542, + "args": { + "External id": 459606,"Record function id": 0, "Ev Idx": 2389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451303531.610, "dur": 8.402, + "args": { + "External id": 459607,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451303552.582, "dur": 7.761, + "args": { + "External id": 459608,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451303578.584, "dur": 69234.051, + "args": { + "External id": 459609,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451303617.597, "dur": 69178.990, + "args": { + "External id": 459610,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451303728.907, "dur": 22.404, + "args": { + "External id": 459611,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451303776.891, "dur": 68962.961, + "args": { + "External id": 459612,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451303780.764, "dur": 68957.851, + "args": { + "External id": 459613,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451303786.949, "dur": 12.429, + "args": { + "External id": 459614,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451303802.285, "dur": 68930.248, + "args": { + "External id": 459615,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451372940.128, "dur": 14.924, + "args": { + "External id": 459616,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451372944.759, "dur": 9.737, + "args": { + "External id": 459617,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451372993.579, "dur": 517.052, + "args": { + "External id": 459618,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451373028.207, "dur": 475.337, + "args": { + "External id": 459619,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2402, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451373041.907, "dur": 454.171, + "args": { + "External id": 459620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451373554.189, "dur": 3.083, + "args": { + "External id": 459621,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2404, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451373656.517, "dur": 8.713, + "args": { + "External id": 459622,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451373725.123, "dur": 1.889, + "args": { + "External id": 459623,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451373748.473, "dur": 4.233, + "args": { + "External id": 459624,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451373768.829, "dur": 1.389, + "args": { + "External id": 459625,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451373786.857, "dur": 1.109, + "args": { + "External id": 459626,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451373802.733, "dur": 1.337, + "args": { + "External id": 459627,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451373818.230, "dur": 3.885, + "args": { + "External id": 459628,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451373836.249, "dur": 4.340, + "args": { + "External id": 459629,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451373854.176, "dur": 1.183, + "args": { + "External id": 459630,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451373975.251, "dur": 3767.214, + "args": { + "External id": 459631,"Record function id": 0, "Ev Idx": 2414 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.20)", "pid": 2338707, "tid": 2379456, + "ts": 6267451373999.826, "dur": 1447.743, + "args": { + "External id": 459632,"Record function id": 0, "Ev Idx": 2415 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338707, "tid": 2379456, + "ts": 6267451374022.071, "dur": 489.416, + "args": { + "External id": 459633,"Record function id": 0, "Ev Idx": 2416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451374184.275, "dur": 6.376, + "args": { + "External id": 459634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451374195.079, "dur": 1.541, + "args": { + "External id": 459635,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451374198.876, "dur": 3.440, + "args": { + "External id": 459636,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451374204.303, "dur": 1.249, + "args": { + "External id": 459637,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451374207.380, "dur": 1.133, + "args": { + "External id": 459638,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451374210.746, "dur": 1.196, + "args": { + "External id": 459639,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451374213.953, "dur": 3.293, + "args": { + "External id": 459640,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451374219.460, "dur": 1.083, + "args": { + "External id": 459641,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451374222.940, "dur": 1.333, + "args": { + "External id": 459642,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451374226.191, "dur": 1.038, + "args": { + "External id": 459643,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451374250.111, "dur": 216.703, + "args": { + "External id": 459644,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451374269.786, "dur": 190.405, + "args": { + "External id": 459645,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451374309.244, "dur": 21.502, + "args": { + "External id": 459646,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451374335.900, "dur": 87.382, + "args": { + "External id": 459647,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451374339.204, "dur": 83.599, + "args": { + "External id": 459648,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451374345.183, "dur": 7.745, + "args": { + "External id": 459649,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451374355.429, "dur": 66.135, + "args": { + "External id": 459650,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2433 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 2338707, "tid": 2379456, + "ts": 6267451374655.482, "dur": 781.489, + "args": { + "External id": 459651,"Record function id": 0, "Ev Idx": 2434 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338707, "tid": 2379456, + "ts": 6267451374679.696, "dur": 741.365, + "args": { + "External id": 459652,"Record function id": 0, "Ev Idx": 2435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451374761.448, "dur": 7.441, + "args": { + "External id": 459653,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451374787.314, "dur": 41.493, + "args": { + "External id": 459654,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451374793.646, "dur": 2.407, + "args": { + "External id": 459655,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451374798.828, "dur": 1.020, + "args": { + "External id": 459656,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451374802.103, "dur": 0.515, + "args": { + "External id": 459657,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451374804.724, "dur": 0.755, + "args": { + "External id": 459658,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451374807.359, "dur": 0.756, + "args": { + "External id": 459659,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451374809.990, "dur": 2.850, + "args": { + "External id": 459660,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451374814.755, "dur": 0.394, + "args": { + "External id": 459661,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451374817.168, "dur": 0.466, + "args": { + "External id": 459662,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451374819.981, "dur": 0.493, + "args": { + "External id": 459663,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451374841.501, "dur": 57.874, + "args": { + "External id": 459664,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267451374936.928, "dur": 191.508, + "args": { + "External id": 459665,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451374949.844, "dur": 3.701, + "args": { + "External id": 459666,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267451374959.973, "dur": 13.104, + "args": { + "External id": 459667,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267451374965.353, "dur": 7.187, + "args": { + "External id": 459668,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451374969.975, "dur": 0.957, + "args": { + "External id": 459669,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451374981.009, "dur": 35.586, + "args": { + "External id": 459670,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451374984.277, "dur": 0.572, + "args": { + "External id": 459671,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451374987.395, "dur": 0.526, + "args": { + "External id": 459672,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451374990.165, "dur": 3.106, + "args": { + "External id": 459673,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451374995.738, "dur": 0.426, + "args": { + "External id": 459674,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451374998.261, "dur": 0.516, + "args": { + "External id": 459675,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451375001.029, "dur": 0.541, + "args": { + "External id": 459676,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451375003.860, "dur": 0.477, + "args": { + "External id": 459677,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451375006.359, "dur": 0.686, + "args": { + "External id": 459678,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451375008.726, "dur": 0.536, + "args": { + "External id": 459679,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451375031.828, "dur": 84.177, + "args": { + "External id": 459680,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451375189.656, "dur": 141.255, + "args": { + "External id": 459681,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451375219.043, "dur": 103.652, + "args": { + "External id": 459682,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2465, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451375231.213, "dur": 86.230, + "args": { + "External id": 459683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451375348.323, "dur": 2.372, + "args": { + "External id": 459684,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2467, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451375455.603, "dur": 2256.522, + "args": { + "External id": 459685,"Sequence number": 5036408, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2468 + } + }, + { + "ph": "f", "id": 182, "pid": 2338707, "tid": 2379456, "ts": 6267451375455.603, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451375613.166, "dur": 136.520, + "args": { + "External id": 459686,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451375803.301, "dur": 54.615, + "args": { + "External id": 459687,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267451375882.091, "dur": 68.453, + "args": { + "External id": 459688,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451375966.153, "dur": 42.126, + "args": { + "External id": 459689,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451376017.497, "dur": 86.913, + "args": { + "External id": 459690,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451376118.065, "dur": 41.739, + "args": { + "External id": 459691,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451376169.512, "dur": 38.152, + "args": { + "External id": 459692,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451376241.072, "dur": 30.976, + "args": { + "External id": 459693,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451376295.618, "dur": 36.803, + "args": { + "External id": 459694,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451376357.959, "dur": 22.992, + "args": { + "External id": 459695,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451376396.642, "dur": 16.273, + "args": { + "External id": 459696,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451376423.328, "dur": 45.893, + "args": { + "External id": 459697,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451376473.637, "dur": 60.775, + "args": { + "External id": 459698,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267451376582.273, "dur": 334.912, + "args": { + "External id": 459699,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451376709.865, "dur": 8.395, + "args": { + "External id": 459700,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451376720.941, "dur": 4.324, + "args": { + "External id": 459701,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451376727.107, "dur": 2.881, + "args": { + "External id": 459702,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451376731.798, "dur": 2.514, + "args": { + "External id": 459703,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451376788.434, "dur": 6.158, + "args": { + "External id": 459704,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451376790.935, "dur": 3.398, + "args": { + "External id": 459705,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451376797.142, "dur": 41.404, + "args": { + "External id": 459706,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451376804.241, "dur": 4.705, + "args": { + "External id": 459707,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451376840.527, "dur": 2.567, + "args": { + "External id": 459708,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451376842.163, "dur": 0.839, + "args": { + "External id": 459709,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451376845.054, "dur": 17.658, + "args": { + "External id": 459710,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451376847.501, "dur": 0.743, + "args": { + "External id": 459711,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451376960.948, "dur": 36.609, + "args": { + "External id": 459712,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451377016.615, "dur": 23.768, + "args": { + "External id": 459713,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451377051.303, "dur": 100.585, + "args": { + "External id": 459714,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451377165.058, "dur": 49.062, + "args": { + "External id": 459715,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451377228.646, "dur": 25.502, + "args": { + "External id": 459716,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451377262.174, "dur": 37.569, + "args": { + "External id": 459717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451377309.318, "dur": 32.440, + "args": { + "External id": 459718,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451377350.623, "dur": 36.869, + "args": { + "External id": 459719,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267451377409.987, "dur": 30.481, + "args": { + "External id": 459720,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451377501.549, "dur": 31.581, + "args": { + "External id": 459721,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451377554.310, "dur": 22.003, + "args": { + "External id": 459722,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451377613.582, "dur": 21.347, + "args": { + "External id": 459723,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267451377654.334, "dur": 19.794, + "args": { + "External id": 459724,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377769.184, "dur": 19.036, + "args": { + "External id": 459725,"Record function id": 0, "Ev Idx": 2508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377773.622, "dur": 13.324, + "args": { + "External id": 459726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377778.645, "dur": 7.205, + "args": { + "External id": 459727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377780.822, "dur": 4.871, + "args": { + "External id": 459728,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377793.197, "dur": 6.615, + "args": { + "External id": 459729,"Record function id": 0, "Ev Idx": 2512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377795.164, "dur": 4.098, + "args": { + "External id": 459730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377796.031, "dur": 2.521, + "args": { + "External id": 459731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377797.080, "dur": 1.364, + "args": { + "External id": 459732,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377803.963, "dur": 7.328, + "args": { + "External id": 459733,"Record function id": 0, "Ev Idx": 2516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377805.592, "dur": 5.180, + "args": { + "External id": 459734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377806.317, "dur": 3.897, + "args": { + "External id": 459735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377806.876, "dur": 3.241, + "args": { + "External id": 459736,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377815.431, "dur": 5.161, + "args": { + "External id": 459737,"Record function id": 0, "Ev Idx": 2520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377817.305, "dur": 2.785, + "args": { + "External id": 459738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377817.875, "dur": 1.739, + "args": { + "External id": 459739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377818.583, "dur": 0.937, + "args": { + "External id": 459740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377824.672, "dur": 4.574, + "args": { + "External id": 459741,"Record function id": 0, "Ev Idx": 2524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377826.117, "dur": 2.614, + "args": { + "External id": 459742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377826.963, "dur": 1.294, + "args": { + "External id": 459743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377827.349, "dur": 0.828, + "args": { + "External id": 459744,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377833.450, "dur": 4.561, + "args": { + "External id": 459745,"Record function id": 0, "Ev Idx": 2528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377834.818, "dur": 2.689, + "args": { + "External id": 459746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377835.478, "dur": 1.551, + "args": { + "External id": 459747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377836.185, "dur": 0.723, + "args": { + "External id": 459748,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377842.258, "dur": 4.535, + "args": { + "External id": 459749,"Record function id": 0, "Ev Idx": 2532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377843.638, "dur": 2.591, + "args": { + "External id": 459750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377844.457, "dur": 1.291, + "args": { + "External id": 459751,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377844.917, "dur": 0.714, + "args": { + "External id": 459752,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377850.752, "dur": 4.880, + "args": { + "External id": 459753,"Record function id": 0, "Ev Idx": 2536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377852.408, "dur": 2.690, + "args": { + "External id": 459754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377853.053, "dur": 1.565, + "args": { + "External id": 459755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377853.714, "dur": 0.823, + "args": { + "External id": 459756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377859.648, "dur": 4.895, + "args": { + "External id": 459757,"Record function id": 0, "Ev Idx": 2540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451377861.223, "dur": 2.794, + "args": { + "External id": 459758,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377862.135, "dur": 1.316, + "args": { + "External id": 459759,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451377862.617, "dur": 0.727, + "args": { + "External id": 459760,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451377869.535, "dur": 65597.868, + "args": { + "External id": 459761,"Record function id": 0, "Sequence number": 5036407, "Fwd thread id": 1, "Ev Idx": 2544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451377871.246, "dur": 65585.171, + "args": { + "External id": 459762,"Sequence number": 5036407, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2545 + } + }, + { + "ph": "f", "id": 183, "pid": 2338707, "tid": 2379456, "ts": 6267451377871.246, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.20)", "pid": 2338707, "tid": 2379456, + "ts": 6267451377911.773, "dur": 46.337, + "args": { + "External id": 459763,"Record function id": 0, "Ev Idx": 2546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.20)", "pid": 2338707, "tid": 2379456, + "ts": 6267451377967.220, "dur": 77.683, + "args": { + "External id": 459764,"Record function id": 0, "Ev Idx": 2547 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.20)", "pid": 2338707, "tid": 2379456, + "ts": 6267451378052.152, "dur": 65394.428, + "args": { + "External id": 459765,"Record function id": 0, "Ev Idx": 2548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451378203.003, "dur": 9.189, + "args": { + "External id": 459766,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451378226.598, "dur": 8.042, + "args": { + "External id": 459767,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451378254.410, "dur": 64054.031, + "args": { + "External id": 459768,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451378271.838, "dur": 64019.903, + "args": { + "External id": 459769,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451378402.080, "dur": 22.612, + "args": { + "External id": 459770,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451378450.092, "dur": 63786.222, + "args": { + "External id": 459771,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451378453.573, "dur": 63781.611, + "args": { + "External id": 459772,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451378459.436, "dur": 12.300, + "args": { + "External id": 459773,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451378477.424, "dur": 63753.059, + "args": { + "External id": 459774,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451442442.070, "dur": 14.705, + "args": { + "External id": 459775,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451442446.708, "dur": 9.620, + "args": { + "External id": 459776,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451442532.415, "dur": 481.578, + "args": { + "External id": 459777,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451442571.108, "dur": 436.111, + "args": { + "External id": 459778,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2561, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451442585.944, "dur": 414.747, + "args": { + "External id": 459779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451443041.360, "dur": 2.676, + "args": { + "External id": 459780,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2563, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451443170.332, "dur": 8.540, + "args": { + "External id": 459781,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451443239.194, "dur": 1.677, + "args": { + "External id": 459782,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451443262.089, "dur": 3.866, + "args": { + "External id": 459783,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451443281.867, "dur": 1.504, + "args": { + "External id": 459784,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451443298.742, "dur": 1.064, + "args": { + "External id": 459785,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451443314.816, "dur": 1.544, + "args": { + "External id": 459786,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451443330.644, "dur": 3.771, + "args": { + "External id": 459787,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451443349.295, "dur": 3.576, + "args": { + "External id": 459788,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451443366.967, "dur": 1.150, + "args": { + "External id": 459789,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451443486.691, "dur": 3731.990, + "args": { + "External id": 459790,"Record function id": 0, "Ev Idx": 2573 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 2338707, "tid": 2379456, + "ts": 6267451443510.128, "dur": 1376.580, + "args": { + "External id": 459791,"Record function id": 0, "Ev Idx": 2574 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338707, "tid": 2379456, + "ts": 6267451443529.909, "dur": 447.123, + "args": { + "External id": 459792,"Record function id": 0, "Ev Idx": 2575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451443660.140, "dur": 6.018, + "args": { + "External id": 459793,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451443670.522, "dur": 2.018, + "args": { + "External id": 459794,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451443674.832, "dur": 3.399, + "args": { + "External id": 459795,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451443680.595, "dur": 1.531, + "args": { + "External id": 459796,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451443683.845, "dur": 1.118, + "args": { + "External id": 459797,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451443687.123, "dur": 1.219, + "args": { + "External id": 459798,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451443690.526, "dur": 3.225, + "args": { + "External id": 459799,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451443695.658, "dur": 1.454, + "args": { + "External id": 459800,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451443698.778, "dur": 1.573, + "args": { + "External id": 459801,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451443702.010, "dur": 1.173, + "args": { + "External id": 459802,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451443725.404, "dur": 210.155, + "args": { + "External id": 459803,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451443745.163, "dur": 184.160, + "args": { + "External id": 459804,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451443781.095, "dur": 19.848, + "args": { + "External id": 459805,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451443805.243, "dur": 86.933, + "args": { + "External id": 459806,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451443808.734, "dur": 82.918, + "args": { + "External id": 459807,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451443815.318, "dur": 7.205, + "args": { + "External id": 459808,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451443824.868, "dur": 66.035, + "args": { + "External id": 459809,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2592 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 2338707, "tid": 2379456, + "ts": 6267451444135.072, "dur": 741.602, + "args": { + "External id": 459810,"Record function id": 0, "Ev Idx": 2593 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338707, "tid": 2379456, + "ts": 6267451444157.027, "dur": 703.354, + "args": { + "External id": 459811,"Record function id": 0, "Ev Idx": 2594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451444237.581, "dur": 6.897, + "args": { + "External id": 459812,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451444263.206, "dur": 42.541, + "args": { + "External id": 459813,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444270.274, "dur": 2.003, + "args": { + "External id": 459814,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444275.159, "dur": 0.930, + "args": { + "External id": 459815,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444278.175, "dur": 0.525, + "args": { + "External id": 459816,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444281.171, "dur": 0.692, + "args": { + "External id": 459817,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444284.298, "dur": 0.605, + "args": { + "External id": 459818,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444286.890, "dur": 2.702, + "args": { + "External id": 459819,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444291.667, "dur": 0.419, + "args": { + "External id": 459820,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444294.393, "dur": 0.627, + "args": { + "External id": 459821,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444296.936, "dur": 0.537, + "args": { + "External id": 459822,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451444318.100, "dur": 56.030, + "args": { + "External id": 459823,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267451444413.390, "dur": 141.008, + "args": { + "External id": 459824,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451444426.751, "dur": 3.926, + "args": { + "External id": 459825,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267451444436.834, "dur": 13.224, + "args": { + "External id": 459826,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267451444442.292, "dur": 7.208, + "args": { + "External id": 459827,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444446.850, "dur": 1.120, + "args": { + "External id": 459828,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451444458.513, "dur": 34.333, + "args": { + "External id": 459829,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444461.537, "dur": 0.619, + "args": { + "External id": 459830,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444464.219, "dur": 0.594, + "args": { + "External id": 459831,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444466.729, "dur": 2.857, + "args": { + "External id": 459832,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444471.575, "dur": 0.736, + "args": { + "External id": 459833,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444474.729, "dur": 0.717, + "args": { + "External id": 459834,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444477.782, "dur": 0.532, + "args": { + "External id": 459835,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444480.445, "dur": 0.478, + "args": { + "External id": 459836,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444483.322, "dur": 0.485, + "args": { + "External id": 459837,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451444485.665, "dur": 0.438, + "args": { + "External id": 459838,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451444507.350, "dur": 37.262, + "args": { + "External id": 459839,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451444636.706, "dur": 139.226, + "args": { + "External id": 459840,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451444664.538, "dur": 106.999, + "args": { + "External id": 459841,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2624, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451444677.909, "dur": 88.852, + "args": { + "External id": 459842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451444792.583, "dur": 2.108, + "args": { + "External id": 459843,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2626, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451444895.232, "dur": 2295.664, + "args": { + "External id": 459844,"Sequence number": 5036406, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2627 + } + }, + { + "ph": "f", "id": 184, "pid": 2338707, "tid": 2379456, "ts": 6267451444895.232, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451445033.283, "dur": 178.012, + "args": { + "External id": 459845,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451445264.692, "dur": 52.059, + "args": { + "External id": 459846,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267451445342.374, "dur": 69.543, + "args": { + "External id": 459847,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451445426.537, "dur": 40.273, + "args": { + "External id": 459848,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451445476.306, "dur": 43.524, + "args": { + "External id": 459849,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451445529.406, "dur": 35.571, + "args": { + "External id": 459850,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451445574.642, "dur": 56.475, + "args": { + "External id": 459851,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451445666.856, "dur": 30.622, + "args": { + "External id": 459852,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451445720.017, "dur": 36.260, + "args": { + "External id": 459853,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451445782.878, "dur": 24.793, + "args": { + "External id": 459854,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451445823.278, "dur": 20.314, + "args": { + "External id": 459855,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451445854.890, "dur": 55.535, + "args": { + "External id": 459856,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451445915.195, "dur": 43.634, + "args": { + "External id": 459857,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267451445997.444, "dur": 412.275, + "args": { + "External id": 459858,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451446172.405, "dur": 10.734, + "args": { + "External id": 459859,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451446186.755, "dur": 3.888, + "args": { + "External id": 459860,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451446192.383, "dur": 2.873, + "args": { + "External id": 459861,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451446197.059, "dur": 2.464, + "args": { + "External id": 459862,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451446268.219, "dur": 10.472, + "args": { + "External id": 459863,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451446271.252, "dur": 7.207, + "args": { + "External id": 459864,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451446281.290, "dur": 44.496, + "args": { + "External id": 459865,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451446288.731, "dur": 4.613, + "args": { + "External id": 459866,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451446328.064, "dur": 2.462, + "args": { + "External id": 459867,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451446329.669, "dur": 0.763, + "args": { + "External id": 459868,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451446332.064, "dur": 20.195, + "args": { + "External id": 459869,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451446334.920, "dur": 0.802, + "args": { + "External id": 459870,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451446457.268, "dur": 34.985, + "args": { + "External id": 459871,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451446512.741, "dur": 21.875, + "args": { + "External id": 459872,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451446545.831, "dur": 84.217, + "args": { + "External id": 459873,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451446642.022, "dur": 57.836, + "args": { + "External id": 459874,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451446715.852, "dur": 29.139, + "args": { + "External id": 459875,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451446753.226, "dur": 39.328, + "args": { + "External id": 459876,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451446802.976, "dur": 34.382, + "args": { + "External id": 459877,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451446846.291, "dur": 37.661, + "args": { + "External id": 459878,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267451446907.621, "dur": 31.762, + "args": { + "External id": 459879,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451446958.661, "dur": 31.076, + "args": { + "External id": 459880,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451447006.150, "dur": 23.048, + "args": { + "External id": 459881,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451447048.820, "dur": 57.644, + "args": { + "External id": 459882,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267451447127.099, "dur": 24.162, + "args": { + "External id": 459883,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447246.431, "dur": 19.448, + "args": { + "External id": 459884,"Record function id": 0, "Ev Idx": 2667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447251.006, "dur": 13.593, + "args": { + "External id": 459885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447256.097, "dur": 7.481, + "args": { + "External id": 459886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447258.195, "dur": 5.229, + "args": { + "External id": 459887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447271.000, "dur": 6.684, + "args": { + "External id": 459888,"Record function id": 0, "Ev Idx": 2671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447272.872, "dur": 4.242, + "args": { + "External id": 459889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447273.871, "dur": 2.622, + "args": { + "External id": 459890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447275.161, "dur": 1.224, + "args": { + "External id": 459891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447282.064, "dur": 7.509, + "args": { + "External id": 459892,"Record function id": 0, "Ev Idx": 2675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447283.494, "dur": 5.517, + "args": { + "External id": 459893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447284.246, "dur": 4.154, + "args": { + "External id": 459894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447284.691, "dur": 3.639, + "args": { + "External id": 459895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447293.720, "dur": 5.193, + "args": { + "External id": 459896,"Record function id": 0, "Ev Idx": 2679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447295.780, "dur": 2.617, + "args": { + "External id": 459897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447296.537, "dur": 1.379, + "args": { + "External id": 459898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447296.909, "dur": 0.930, + "args": { + "External id": 459899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447303.006, "dur": 5.102, + "args": { + "External id": 459900,"Record function id": 0, "Ev Idx": 2683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447304.481, "dur": 3.098, + "args": { + "External id": 459901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447305.285, "dur": 1.677, + "args": { + "External id": 459902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447306.018, "dur": 0.866, + "args": { + "External id": 459903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447312.437, "dur": 4.787, + "args": { + "External id": 459904,"Record function id": 0, "Ev Idx": 2687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447313.844, "dur": 2.863, + "args": { + "External id": 459905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447314.770, "dur": 1.388, + "args": { + "External id": 459906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447315.311, "dur": 0.729, + "args": { + "External id": 459907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447321.282, "dur": 4.702, + "args": { + "External id": 459908,"Record function id": 0, "Ev Idx": 2691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447322.886, "dur": 2.545, + "args": { + "External id": 459909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447323.439, "dur": 1.280, + "args": { + "External id": 459910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447323.781, "dur": 0.822, + "args": { + "External id": 459911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447329.962, "dur": 4.545, + "args": { + "External id": 459912,"Record function id": 0, "Ev Idx": 2695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447331.337, "dur": 2.659, + "args": { + "External id": 459913,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447332.099, "dur": 1.146, + "args": { + "External id": 459914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447332.409, "dur": 0.759, + "args": { + "External id": 459915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447338.522, "dur": 5.272, + "args": { + "External id": 459916,"Record function id": 0, "Ev Idx": 2699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451447340.578, "dur": 2.703, + "args": { + "External id": 459917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447341.176, "dur": 1.630, + "args": { + "External id": 459918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451447341.973, "dur": 0.721, + "args": { + "External id": 459919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451447348.659, "dur": 64006.960, + "args": { + "External id": 459920,"Record function id": 0, "Sequence number": 5036405, "Fwd thread id": 1, "Ev Idx": 2703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451447350.349, "dur": 63993.763, + "args": { + "External id": 459921,"Sequence number": 5036405, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2704 + } + }, + { + "ph": "f", "id": 185, "pid": 2338707, "tid": 2379456, "ts": 6267451447350.349, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 2338707, "tid": 2379456, + "ts": 6267451447387.375, "dur": 48.698, + "args": { + "External id": 459922,"Record function id": 0, "Ev Idx": 2705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 2338707, "tid": 2379456, + "ts": 6267451447445.733, "dur": 79.723, + "args": { + "External id": 459923,"Record function id": 0, "Ev Idx": 2706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 2338707, "tid": 2379456, + "ts": 6267451447533.176, "dur": 63799.960, + "args": { + "External id": 459924,"Record function id": 0, "Ev Idx": 2707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451447664.357, "dur": 9.549, + "args": { + "External id": 459925,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451447687.888, "dur": 8.356, + "args": { + "External id": 459926,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451447716.776, "dur": 62473.725, + "args": { + "External id": 459927,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451447734.300, "dur": 62439.423, + "args": { + "External id": 459928,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451447912.013, "dur": 29.999, + "args": { + "External id": 459929,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451447967.586, "dur": 62139.172, + "args": { + "External id": 459930,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451447971.268, "dur": 62134.255, + "args": { + "External id": 459931,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451447977.035, "dur": 12.272, + "args": { + "External id": 459932,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451447991.904, "dur": 62108.347, + "args": { + "External id": 459933,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451510323.551, "dur": 15.188, + "args": { + "External id": 459934,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451510328.634, "dur": 9.635, + "args": { + "External id": 459935,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451510378.764, "dur": 510.593, + "args": { + "External id": 459936,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451510413.486, "dur": 468.569, + "args": { + "External id": 459937,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2720, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451510427.631, "dur": 446.399, + "args": { + "External id": 459938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451510918.728, "dur": 2.767, + "args": { + "External id": 459939,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2722, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451511001.395, "dur": 7.709, + "args": { + "External id": 459940,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451511109.761, "dur": 2.969, + "args": { + "External id": 459941,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451511135.828, "dur": 4.142, + "args": { + "External id": 459942,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451511156.794, "dur": 1.257, + "args": { + "External id": 459943,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451511172.072, "dur": 1.190, + "args": { + "External id": 459944,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451511189.265, "dur": 1.310, + "args": { + "External id": 459945,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451511206.183, "dur": 3.639, + "args": { + "External id": 459946,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451511224.492, "dur": 3.010, + "args": { + "External id": 459947,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451511252.123, "dur": 1.225, + "args": { + "External id": 459948,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451511376.219, "dur": 3743.112, + "args": { + "External id": 459949,"Record function id": 0, "Ev Idx": 2732 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 2338707, "tid": 2379456, + "ts": 6267451511400.492, "dur": 1412.927, + "args": { + "External id": 459950,"Record function id": 0, "Ev Idx": 2733 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338707, "tid": 2379456, + "ts": 6267451511421.122, "dur": 453.630, + "args": { + "External id": 459951,"Record function id": 0, "Ev Idx": 2734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451511533.995, "dur": 5.456, + "args": { + "External id": 459952,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451511543.545, "dur": 1.225, + "args": { + "External id": 459953,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451511547.041, "dur": 3.874, + "args": { + "External id": 459954,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451511552.943, "dur": 1.468, + "args": { + "External id": 459955,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451511556.231, "dur": 1.057, + "args": { + "External id": 459956,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451511559.370, "dur": 1.364, + "args": { + "External id": 459957,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451511562.719, "dur": 2.346, + "args": { + "External id": 459958,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451511566.891, "dur": 0.831, + "args": { + "External id": 459959,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451511569.700, "dur": 1.207, + "args": { + "External id": 459960,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451511572.460, "dur": 0.877, + "args": { + "External id": 459961,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451511612.505, "dur": 216.232, + "args": { + "External id": 459962,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451511633.232, "dur": 188.207, + "args": { + "External id": 459963,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451511664.927, "dur": 19.355, + "args": { + "External id": 459964,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451511688.860, "dur": 91.628, + "args": { + "External id": 459965,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451511692.042, "dur": 87.949, + "args": { + "External id": 459966,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451511698.012, "dur": 7.318, + "args": { + "External id": 459967,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451511707.552, "dur": 71.656, + "args": { + "External id": 459968,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2751 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 2338707, "tid": 2379456, + "ts": 6267451511992.416, "dur": 809.760, + "args": { + "External id": 459969,"Record function id": 0, "Ev Idx": 2752 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338707, "tid": 2379456, + "ts": 6267451512014.263, "dur": 771.156, + "args": { + "External id": 459970,"Record function id": 0, "Ev Idx": 2753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451512143.011, "dur": 8.151, + "args": { + "External id": 459971,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451512170.896, "dur": 39.735, + "args": { + "External id": 459972,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512177.888, "dur": 2.434, + "args": { + "External id": 459973,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512183.065, "dur": 0.450, + "args": { + "External id": 459974,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512186.072, "dur": 0.593, + "args": { + "External id": 459975,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512188.730, "dur": 0.533, + "args": { + "External id": 459976,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512191.114, "dur": 0.585, + "args": { + "External id": 459977,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512193.841, "dur": 2.636, + "args": { + "External id": 459978,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512198.413, "dur": 0.635, + "args": { + "External id": 459979,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512201.072, "dur": 0.464, + "args": { + "External id": 459980,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512203.987, "dur": 0.460, + "args": { + "External id": 459981,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451512229.019, "dur": 63.183, + "args": { + "External id": 459982,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267451512332.376, "dur": 144.858, + "args": { + "External id": 459983,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451512345.547, "dur": 4.653, + "args": { + "External id": 459984,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267451512357.146, "dur": 12.601, + "args": { + "External id": 459985,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267451512362.398, "dur": 6.811, + "args": { + "External id": 459986,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512367.044, "dur": 0.663, + "args": { + "External id": 459987,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451512378.414, "dur": 32.507, + "args": { + "External id": 459988,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512381.690, "dur": 0.794, + "args": { + "External id": 459989,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512385.338, "dur": 0.428, + "args": { + "External id": 459990,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512387.882, "dur": 2.428, + "args": { + "External id": 459991,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512392.508, "dur": 0.510, + "args": { + "External id": 459992,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512394.917, "dur": 0.426, + "args": { + "External id": 459993,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512397.509, "dur": 0.333, + "args": { + "External id": 459994,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512399.654, "dur": 0.456, + "args": { + "External id": 459995,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512402.053, "dur": 0.377, + "args": { + "External id": 459996,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451512404.753, "dur": 0.615, + "args": { + "External id": 459997,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451512425.800, "dur": 38.750, + "args": { + "External id": 459998,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451512536.605, "dur": 161.077, + "args": { + "External id": 459999,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451512564.858, "dur": 127.488, + "args": { + "External id": 460000,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2783, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451512577.216, "dur": 109.492, + "args": { + "External id": 460001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451512716.755, "dur": 2.199, + "args": { + "External id": 460002,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2785, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451512823.014, "dur": 2228.430, + "args": { + "External id": 460003,"Sequence number": 5036404, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2786 + } + }, + { + "ph": "f", "id": 186, "pid": 2338707, "tid": 2379456, "ts": 6267451512823.014, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451512960.348, "dur": 180.055, + "args": { + "External id": 460004,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451513194.975, "dur": 50.435, + "args": { + "External id": 460005,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267451513270.752, "dur": 71.145, + "args": { + "External id": 460006,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451513356.737, "dur": 40.119, + "args": { + "External id": 460007,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451513405.646, "dur": 40.849, + "args": { + "External id": 460008,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451513455.500, "dur": 33.408, + "args": { + "External id": 460009,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451513498.077, "dur": 36.490, + "args": { + "External id": 460010,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451513565.355, "dur": 45.618, + "args": { + "External id": 460011,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451513636.638, "dur": 41.476, + "args": { + "External id": 460012,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451513704.619, "dur": 23.288, + "args": { + "External id": 460013,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451513747.105, "dur": 16.886, + "args": { + "External id": 460014,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451513775.643, "dur": 66.522, + "args": { + "External id": 460015,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451513853.331, "dur": 54.481, + "args": { + "External id": 460016,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267451513953.445, "dur": 372.253, + "args": { + "External id": 460017,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451514099.322, "dur": 9.594, + "args": { + "External id": 460018,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451514112.506, "dur": 4.062, + "args": { + "External id": 460019,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451514118.537, "dur": 2.275, + "args": { + "External id": 460020,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451514122.716, "dur": 2.622, + "args": { + "External id": 460021,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451514186.550, "dur": 6.333, + "args": { + "External id": 460022,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451514189.146, "dur": 3.513, + "args": { + "External id": 460023,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451514195.637, "dur": 43.406, + "args": { + "External id": 460024,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451514202.567, "dur": 4.429, + "args": { + "External id": 460025,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451514240.921, "dur": 2.687, + "args": { + "External id": 460026,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451514242.849, "dur": 0.669, + "args": { + "External id": 460027,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451514245.346, "dur": 19.138, + "args": { + "External id": 460028,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451514248.013, "dur": 0.619, + "args": { + "External id": 460029,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451514373.477, "dur": 36.195, + "args": { + "External id": 460030,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451514430.573, "dur": 22.301, + "args": { + "External id": 460031,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451514464.238, "dur": 66.191, + "args": { + "External id": 460032,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451514539.027, "dur": 69.184, + "args": { + "External id": 460033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451514628.108, "dur": 33.204, + "args": { + "External id": 460034,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451514669.942, "dur": 40.685, + "args": { + "External id": 460035,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451514720.068, "dur": 33.173, + "args": { + "External id": 460036,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451514762.529, "dur": 36.947, + "args": { + "External id": 460037,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267451514823.307, "dur": 31.339, + "args": { + "External id": 460038,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451514873.961, "dur": 30.452, + "args": { + "External id": 460039,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451514921.415, "dur": 21.116, + "args": { + "External id": 460040,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451514962.308, "dur": 17.518, + "args": { + "External id": 460041,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267451514995.538, "dur": 18.990, + "args": { + "External id": 460042,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515147.561, "dur": 19.031, + "args": { + "External id": 460043,"Record function id": 0, "Ev Idx": 2826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515152.065, "dur": 13.133, + "args": { + "External id": 460044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515157.159, "dur": 6.930, + "args": { + "External id": 460045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515159.034, "dur": 4.923, + "args": { + "External id": 460046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515171.874, "dur": 6.157, + "args": { + "External id": 460047,"Record function id": 0, "Ev Idx": 2830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515173.673, "dur": 3.810, + "args": { + "External id": 460048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515174.576, "dur": 2.331, + "args": { + "External id": 460049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515175.533, "dur": 1.262, + "args": { + "External id": 460050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515182.522, "dur": 7.345, + "args": { + "External id": 460051,"Record function id": 0, "Ev Idx": 2834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515184.188, "dur": 5.195, + "args": { + "External id": 460052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515184.750, "dur": 4.129, + "args": { + "External id": 460053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515185.089, "dur": 3.693, + "args": { + "External id": 460054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515194.419, "dur": 5.011, + "args": { + "External id": 460055,"Record function id": 0, "Ev Idx": 2838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515196.259, "dur": 2.664, + "args": { + "External id": 460056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515196.829, "dur": 1.474, + "args": { + "External id": 460057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515197.315, "dur": 0.901, + "args": { + "External id": 460058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515203.521, "dur": 4.593, + "args": { + "External id": 460059,"Record function id": 0, "Ev Idx": 2842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515204.908, "dur": 2.725, + "args": { + "External id": 460060,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515205.632, "dur": 1.530, + "args": { + "External id": 460061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515206.077, "dur": 0.997, + "args": { + "External id": 460062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515212.561, "dur": 5.434, + "args": { + "External id": 460063,"Record function id": 0, "Ev Idx": 2846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515214.603, "dur": 2.909, + "args": { + "External id": 460064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515215.144, "dur": 1.878, + "args": { + "External id": 460065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515215.985, "dur": 0.884, + "args": { + "External id": 460066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515222.133, "dur": 4.724, + "args": { + "External id": 460067,"Record function id": 0, "Ev Idx": 2850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515224.063, "dur": 2.326, + "args": { + "External id": 460068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515224.703, "dur": 1.208, + "args": { + "External id": 460069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515225.046, "dur": 0.715, + "args": { + "External id": 460070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515231.166, "dur": 4.267, + "args": { + "External id": 460071,"Record function id": 0, "Ev Idx": 2854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515232.688, "dur": 2.235, + "args": { + "External id": 460072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515233.229, "dur": 1.222, + "args": { + "External id": 460073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515233.587, "dur": 0.749, + "args": { + "External id": 460074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515239.365, "dur": 4.847, + "args": { + "External id": 460075,"Record function id": 0, "Ev Idx": 2858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451515241.079, "dur": 2.657, + "args": { + "External id": 460076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515241.616, "dur": 1.380, + "args": { + "External id": 460077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451515242.187, "dur": 0.669, + "args": { + "External id": 460078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451515249.072, "dur": 59781.331, + "args": { + "External id": 460079,"Record function id": 0, "Sequence number": 5036403, "Fwd thread id": 1, "Ev Idx": 2862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451515250.952, "dur": 59768.076, + "args": { + "External id": 460080,"Sequence number": 5036403, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2863 + } + }, + { + "ph": "f", "id": 187, "pid": 2338707, "tid": 2379456, "ts": 6267451515250.952, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 2338707, "tid": 2379456, + "ts": 6267451515288.986, "dur": 47.817, + "args": { + "External id": 460081,"Record function id": 0, "Ev Idx": 2864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 2338707, "tid": 2379456, + "ts": 6267451515346.657, "dur": 79.366, + "args": { + "External id": 460082,"Record function id": 0, "Ev Idx": 2865 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 2338707, "tid": 2379456, + "ts": 6267451515433.489, "dur": 59575.105, + "args": { + "External id": 460083,"Record function id": 0, "Ev Idx": 2866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451515545.751, "dur": 8.652, + "args": { + "External id": 460084,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451515580.639, "dur": 9.166, + "args": { + "External id": 460085,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451515632.730, "dur": 58312.951, + "args": { + "External id": 460086,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451515651.861, "dur": 58278.852, + "args": { + "External id": 460087,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451515814.880, "dur": 21.497, + "args": { + "External id": 460088,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451515862.423, "dur": 58016.060, + "args": { + "External id": 460089,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451515866.143, "dur": 58011.295, + "args": { + "External id": 460090,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451515872.097, "dur": 13.712, + "args": { + "External id": 460091,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451515888.616, "dur": 57983.835, + "args": { + "External id": 460092,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451574107.580, "dur": 14.889, + "args": { + "External id": 460093,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451574112.113, "dur": 9.617, + "args": { + "External id": 460094,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451574161.653, "dur": 466.978, + "args": { + "External id": 460095,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451574196.377, "dur": 425.130, + "args": { + "External id": 460096,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2879, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451574211.086, "dur": 402.473, + "args": { + "External id": 460097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451574654.299, "dur": 3.044, + "args": { + "External id": 460098,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2881, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451574736.806, "dur": 8.069, + "args": { + "External id": 460099,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451574802.636, "dur": 1.866, + "args": { + "External id": 460100,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451574825.653, "dur": 3.368, + "args": { + "External id": 460101,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451574845.926, "dur": 1.105, + "args": { + "External id": 460102,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451574862.552, "dur": 1.375, + "args": { + "External id": 460103,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451574878.519, "dur": 1.114, + "args": { + "External id": 460104,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451574894.773, "dur": 4.083, + "args": { + "External id": 460105,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451574914.386, "dur": 2.366, + "args": { + "External id": 460106,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451574931.542, "dur": 1.136, + "args": { + "External id": 460107,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451575048.600, "dur": 3744.287, + "args": { + "External id": 460108,"Record function id": 0, "Ev Idx": 2891 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 2338707, "tid": 2379456, + "ts": 6267451575117.387, "dur": 1376.774, + "args": { + "External id": 460109,"Record function id": 0, "Ev Idx": 2892 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338707, "tid": 2379456, + "ts": 6267451575136.927, "dur": 443.397, + "args": { + "External id": 460110,"Record function id": 0, "Ev Idx": 2893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451575259.692, "dur": 6.099, + "args": { + "External id": 460111,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451575270.186, "dur": 1.201, + "args": { + "External id": 460112,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451575273.710, "dur": 3.411, + "args": { + "External id": 460113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451575279.054, "dur": 1.073, + "args": { + "External id": 460114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451575281.832, "dur": 1.137, + "args": { + "External id": 460115,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451575284.890, "dur": 0.977, + "args": { + "External id": 460116,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451575287.655, "dur": 2.512, + "args": { + "External id": 460117,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451575292.118, "dur": 1.071, + "args": { + "External id": 460118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451575294.843, "dur": 1.126, + "args": { + "External id": 460119,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451575297.544, "dur": 0.916, + "args": { + "External id": 460120,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451575337.596, "dur": 202.892, + "args": { + "External id": 460121,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451575357.195, "dur": 177.235, + "args": { + "External id": 460122,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451575388.535, "dur": 20.164, + "args": { + "External id": 460123,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451575413.309, "dur": 84.388, + "args": { + "External id": 460124,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451575416.760, "dur": 80.495, + "args": { + "External id": 460125,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451575422.272, "dur": 7.027, + "args": { + "External id": 460126,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451575431.768, "dur": 64.896, + "args": { + "External id": 460127,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 2338707, "tid": 2379456, + "ts": 6267451575720.279, "dur": 763.444, + "args": { + "External id": 460128,"Record function id": 0, "Ev Idx": 2911 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338707, "tid": 2379456, + "ts": 6267451575744.502, "dur": 724.225, + "args": { + "External id": 460129,"Record function id": 0, "Ev Idx": 2912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451575825.110, "dur": 7.718, + "args": { + "External id": 460130,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451575851.589, "dur": 38.057, + "args": { + "External id": 460131,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451575858.161, "dur": 2.065, + "args": { + "External id": 460132,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451575863.119, "dur": 0.640, + "args": { + "External id": 460133,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451575865.616, "dur": 0.719, + "args": { + "External id": 460134,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451575868.185, "dur": 0.588, + "args": { + "External id": 460135,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451575870.729, "dur": 0.482, + "args": { + "External id": 460136,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451575873.115, "dur": 2.987, + "args": { + "External id": 460137,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451575877.959, "dur": 0.555, + "args": { + "External id": 460138,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451575880.198, "dur": 0.514, + "args": { + "External id": 460139,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451575882.570, "dur": 0.452, + "args": { + "External id": 460140,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451575901.829, "dur": 53.842, + "args": { + "External id": 460141,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267451575993.299, "dur": 184.203, + "args": { + "External id": 460142,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451576006.256, "dur": 4.433, + "args": { + "External id": 460143,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267451576017.712, "dur": 12.307, + "args": { + "External id": 460144,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267451576023.002, "dur": 6.488, + "args": { + "External id": 460145,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451576027.225, "dur": 0.759, + "args": { + "External id": 460146,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451576038.447, "dur": 74.582, + "args": { + "External id": 460147,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451576040.787, "dur": 0.535, + "args": { + "External id": 460148,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451576043.202, "dur": 0.417, + "args": { + "External id": 460149,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451576045.230, "dur": 2.939, + "args": { + "External id": 460150,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451576050.162, "dur": 0.478, + "args": { + "External id": 460151,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451576095.147, "dur": 0.778, + "args": { + "External id": 460152,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451576098.623, "dur": 0.362, + "args": { + "External id": 460153,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451576101.317, "dur": 0.538, + "args": { + "External id": 460154,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451576103.727, "dur": 0.693, + "args": { + "External id": 460155,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451576106.568, "dur": 0.415, + "args": { + "External id": 460156,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451576128.437, "dur": 38.804, + "args": { + "External id": 460157,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451576241.306, "dur": 145.956, + "args": { + "External id": 460158,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451576272.849, "dur": 109.965, + "args": { + "External id": 460159,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2942, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451576285.088, "dur": 92.498, + "args": { + "External id": 460160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451576403.499, "dur": 2.225, + "args": { + "External id": 460161,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2944, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451576503.657, "dur": 2256.950, + "args": { + "External id": 460162,"Sequence number": 5036402, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2945 + } + }, + { + "ph": "f", "id": 188, "pid": 2338707, "tid": 2379456, "ts": 6267451576503.657, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451576663.306, "dur": 133.279, + "args": { + "External id": 460163,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451576844.986, "dur": 52.386, + "args": { + "External id": 460164,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267451576921.470, "dur": 62.169, + "args": { + "External id": 460165,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451576998.425, "dur": 38.102, + "args": { + "External id": 460166,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451577045.719, "dur": 87.427, + "args": { + "External id": 460167,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451577146.768, "dur": 38.201, + "args": { + "External id": 460168,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451577194.126, "dur": 38.354, + "args": { + "External id": 460169,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451577266.439, "dur": 34.614, + "args": { + "External id": 460170,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451577345.613, "dur": 35.310, + "args": { + "External id": 460171,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451577408.959, "dur": 23.721, + "args": { + "External id": 460172,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451577450.104, "dur": 18.471, + "args": { + "External id": 460173,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451577480.199, "dur": 47.347, + "args": { + "External id": 460174,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451577532.339, "dur": 39.709, + "args": { + "External id": 460175,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267451577655.371, "dur": 337.311, + "args": { + "External id": 460176,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451577763.454, "dur": 9.592, + "args": { + "External id": 460177,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451577776.500, "dur": 4.150, + "args": { + "External id": 460178,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451577791.246, "dur": 2.727, + "args": { + "External id": 460179,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451577795.582, "dur": 2.131, + "args": { + "External id": 460180,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451577861.009, "dur": 6.148, + "args": { + "External id": 460181,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451577863.387, "dur": 3.521, + "args": { + "External id": 460182,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451577870.071, "dur": 41.498, + "args": { + "External id": 460183,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451577877.223, "dur": 4.594, + "args": { + "External id": 460184,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451577913.484, "dur": 2.089, + "args": { + "External id": 460185,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451577914.798, "dur": 0.676, + "args": { + "External id": 460186,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451577917.330, "dur": 17.161, + "args": { + "External id": 460187,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451577919.360, "dur": 0.740, + "args": { + "External id": 460188,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451578041.843, "dur": 78.905, + "args": { + "External id": 460189,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451578144.798, "dur": 22.129, + "args": { + "External id": 460190,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451578179.645, "dur": 65.882, + "args": { + "External id": 460191,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451578254.822, "dur": 51.958, + "args": { + "External id": 460192,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451578320.422, "dur": 25.754, + "args": { + "External id": 460193,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451578353.989, "dur": 38.867, + "args": { + "External id": 460194,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451578402.718, "dur": 33.304, + "args": { + "External id": 460195,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451578445.427, "dur": 37.321, + "args": { + "External id": 460196,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267451578505.059, "dur": 29.376, + "args": { + "External id": 460197,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451578552.853, "dur": 31.327, + "args": { + "External id": 460198,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451578620.236, "dur": 25.999, + "args": { + "External id": 460199,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451578667.359, "dur": 20.176, + "args": { + "External id": 460200,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267451578702.997, "dur": 21.263, + "args": { + "External id": 460201,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578819.684, "dur": 19.064, + "args": { + "External id": 460202,"Record function id": 0, "Ev Idx": 2985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578824.211, "dur": 13.366, + "args": { + "External id": 460203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578829.187, "dur": 7.308, + "args": { + "External id": 460204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578831.488, "dur": 4.858, + "args": { + "External id": 460205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578843.682, "dur": 5.925, + "args": { + "External id": 460206,"Record function id": 0, "Ev Idx": 2989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578845.385, "dur": 3.602, + "args": { + "External id": 460207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578846.352, "dur": 1.943, + "args": { + "External id": 460208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578847.132, "dur": 1.041, + "args": { + "External id": 460209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578853.782, "dur": 8.352, + "args": { + "External id": 460210,"Record function id": 0, "Ev Idx": 2993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578855.763, "dur": 5.837, + "args": { + "External id": 460211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578856.353, "dur": 4.694, + "args": { + "External id": 460212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578857.057, "dur": 3.909, + "args": { + "External id": 460213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578866.197, "dur": 4.864, + "args": { + "External id": 460214,"Record function id": 0, "Ev Idx": 2997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578867.560, "dur": 3.020, + "args": { + "External id": 460215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578868.470, "dur": 1.632, + "args": { + "External id": 460216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578869.133, "dur": 0.892, + "args": { + "External id": 460217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578878.043, "dur": 4.862, + "args": { + "External id": 460218,"Record function id": 0, "Ev Idx": 3001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578879.718, "dur": 2.691, + "args": { + "External id": 460219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578880.422, "dur": 1.447, + "args": { + "External id": 460220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578880.909, "dur": 0.884, + "args": { + "External id": 460221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578887.109, "dur": 4.828, + "args": { + "External id": 460222,"Record function id": 0, "Ev Idx": 3005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578888.728, "dur": 2.692, + "args": { + "External id": 460223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578889.265, "dur": 1.678, + "args": { + "External id": 460224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578889.828, "dur": 0.979, + "args": { + "External id": 460225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578896.013, "dur": 4.467, + "args": { + "External id": 460226,"Record function id": 0, "Ev Idx": 3009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578897.616, "dur": 2.358, + "args": { + "External id": 460227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578898.288, "dur": 1.212, + "args": { + "External id": 460228,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578898.672, "dur": 0.730, + "args": { + "External id": 460229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578904.435, "dur": 4.061, + "args": { + "External id": 460230,"Record function id": 0, "Ev Idx": 3013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578905.863, "dur": 2.128, + "args": { + "External id": 460231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578906.418, "dur": 1.097, + "args": { + "External id": 460232,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578906.733, "dur": 0.703, + "args": { + "External id": 460233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578912.593, "dur": 4.511, + "args": { + "External id": 460234,"Record function id": 0, "Ev Idx": 3017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451578914.173, "dur": 2.426, + "args": { + "External id": 460235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578914.725, "dur": 1.372, + "args": { + "External id": 460236,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451578915.353, "dur": 0.638, + "args": { + "External id": 460237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451578921.846, "dur": 61881.515, + "args": { + "External id": 460238,"Record function id": 0, "Sequence number": 5036401, "Fwd thread id": 1, "Ev Idx": 3021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451578923.328, "dur": 61868.608, + "args": { + "External id": 460239,"Sequence number": 5036401, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3022 + } + }, + { + "ph": "f", "id": 189, "pid": 2338707, "tid": 2379456, "ts": 6267451578923.328, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 2338707, "tid": 2379456, + "ts": 6267451578963.527, "dur": 47.573, + "args": { + "External id": 460240,"Record function id": 0, "Ev Idx": 3023 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 2338707, "tid": 2379456, + "ts": 6267451579020.315, "dur": 124.506, + "args": { + "External id": 460241,"Record function id": 0, "Ev Idx": 3024 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 2338707, "tid": 2379456, + "ts": 6267451579154.755, "dur": 61625.892, + "args": { + "External id": 460242,"Record function id": 0, "Ev Idx": 3025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451579270.112, "dur": 8.845, + "args": { + "External id": 460243,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451579291.936, "dur": 8.582, + "args": { + "External id": 460244,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451579318.995, "dur": 60401.449, + "args": { + "External id": 460245,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451579336.649, "dur": 60367.112, + "args": { + "External id": 460246,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451579473.741, "dur": 24.222, + "args": { + "External id": 460247,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451579526.934, "dur": 60119.232, + "args": { + "External id": 460248,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451579530.828, "dur": 60114.243, + "args": { + "External id": 460249,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451579536.746, "dur": 12.764, + "args": { + "External id": 460250,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451579552.215, "dur": 60089.281, + "args": { + "External id": 460251,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451639853.084, "dur": 14.796, + "args": { + "External id": 460252,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451639858.325, "dur": 9.113, + "args": { + "External id": 460253,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451639910.936, "dur": 473.675, + "args": { + "External id": 460254,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451639944.838, "dur": 432.995, + "args": { + "External id": 460255,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3038, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451639959.230, "dur": 410.992, + "args": { + "External id": 460256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451640411.234, "dur": 2.858, + "args": { + "External id": 460257,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3040, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451640491.719, "dur": 8.156, + "args": { + "External id": 460258,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451640556.901, "dur": 1.536, + "args": { + "External id": 460259,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451640579.299, "dur": 4.244, + "args": { + "External id": 460260,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451640620.056, "dur": 1.892, + "args": { + "External id": 460261,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451640637.698, "dur": 1.117, + "args": { + "External id": 460262,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451640653.127, "dur": 1.469, + "args": { + "External id": 460263,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451640667.658, "dur": 3.452, + "args": { + "External id": 460264,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451640684.941, "dur": 2.898, + "args": { + "External id": 460265,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451640700.926, "dur": 1.346, + "args": { + "External id": 460266,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451640822.940, "dur": 3763.625, + "args": { + "External id": 460267,"Record function id": 0, "Ev Idx": 3050 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 2338707, "tid": 2379456, + "ts": 6267451640849.741, "dur": 1465.648, + "args": { + "External id": 460268,"Record function id": 0, "Ev Idx": 3051 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338707, "tid": 2379456, + "ts": 6267451640868.762, "dur": 475.629, + "args": { + "External id": 460269,"Record function id": 0, "Ev Idx": 3052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451640976.106, "dur": 5.043, + "args": { + "External id": 460270,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451640984.796, "dur": 1.274, + "args": { + "External id": 460271,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451640988.228, "dur": 3.777, + "args": { + "External id": 460272,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451640994.035, "dur": 1.063, + "args": { + "External id": 460273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451640996.858, "dur": 1.023, + "args": { + "External id": 460274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451640999.881, "dur": 1.007, + "args": { + "External id": 460275,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451641002.701, "dur": 3.111, + "args": { + "External id": 460276,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451641007.391, "dur": 0.932, + "args": { + "External id": 460277,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451641009.881, "dur": 1.010, + "args": { + "External id": 460278,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451641012.722, "dur": 1.100, + "args": { + "External id": 460279,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451641050.308, "dur": 251.703, + "args": { + "External id": 460280,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451641111.818, "dur": 183.471, + "args": { + "External id": 460281,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451641142.949, "dur": 20.759, + "args": { + "External id": 460282,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451641168.704, "dur": 87.872, + "args": { + "External id": 460283,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451641171.986, "dur": 84.116, + "args": { + "External id": 460284,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641177.229, "dur": 6.774, + "args": { + "External id": 460285,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451641186.452, "dur": 68.863, + "args": { + "External id": 460286,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3069 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 2338707, "tid": 2379456, + "ts": 6267451641457.136, "dur": 847.641, + "args": { + "External id": 460287,"Record function id": 0, "Ev Idx": 3070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338707, "tid": 2379456, + "ts": 6267451641481.301, "dur": 806.376, + "args": { + "External id": 460288,"Record function id": 0, "Ev Idx": 3071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451641618.698, "dur": 10.369, + "args": { + "External id": 460289,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451641653.940, "dur": 39.337, + "args": { + "External id": 460290,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641661.189, "dur": 2.171, + "args": { + "External id": 460291,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641665.695, "dur": 0.805, + "args": { + "External id": 460292,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641668.735, "dur": 0.597, + "args": { + "External id": 460293,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641671.404, "dur": 0.540, + "args": { + "External id": 460294,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641674.204, "dur": 0.510, + "args": { + "External id": 460295,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641676.530, "dur": 3.008, + "args": { + "External id": 460296,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641681.364, "dur": 0.726, + "args": { + "External id": 460297,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641683.776, "dur": 0.808, + "args": { + "External id": 460298,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641686.284, "dur": 0.414, + "args": { + "External id": 460299,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451641706.190, "dur": 58.409, + "args": { + "External id": 460300,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267451641805.615, "dur": 140.495, + "args": { + "External id": 460301,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451641819.602, "dur": 3.955, + "args": { + "External id": 460302,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267451641830.004, "dur": 12.808, + "args": { + "External id": 460303,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267451641835.085, "dur": 7.175, + "args": { + "External id": 460304,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641839.809, "dur": 0.855, + "args": { + "External id": 460305,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451641851.040, "dur": 29.399, + "args": { + "External id": 460306,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641853.541, "dur": 0.696, + "args": { + "External id": 460307,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641856.304, "dur": 0.536, + "args": { + "External id": 460308,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641858.710, "dur": 2.399, + "args": { + "External id": 460309,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641863.082, "dur": 0.490, + "args": { + "External id": 460310,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641865.298, "dur": 0.645, + "args": { + "External id": 460311,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641867.915, "dur": 0.437, + "args": { + "External id": 460312,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641870.259, "dur": 0.381, + "args": { + "External id": 460313,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641872.329, "dur": 0.502, + "args": { + "External id": 460314,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451641874.444, "dur": 0.550, + "args": { + "External id": 460315,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451641896.967, "dur": 39.380, + "args": { + "External id": 460316,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451642007.896, "dur": 186.111, + "args": { + "External id": 460317,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451642034.853, "dur": 153.505, + "args": { + "External id": 460318,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3101, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451642047.302, "dur": 135.789, + "args": { + "External id": 460319,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451642213.611, "dur": 2.217, + "args": { + "External id": 460320,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3103, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451642325.206, "dur": 2233.480, + "args": { + "External id": 460321,"Sequence number": 5036400, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3104 + } + }, + { + "ph": "f", "id": 190, "pid": 2338707, "tid": 2379456, "ts": 6267451642325.206, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451642464.745, "dur": 154.364, + "args": { + "External id": 460322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451642691.714, "dur": 50.483, + "args": { + "External id": 460323,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267451642766.511, "dur": 70.832, + "args": { + "External id": 460324,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451642852.490, "dur": 41.103, + "args": { + "External id": 460325,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451642902.782, "dur": 43.033, + "args": { + "External id": 460326,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451642955.157, "dur": 35.518, + "args": { + "External id": 460327,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451643000.593, "dur": 38.587, + "args": { + "External id": 460328,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451643112.252, "dur": 34.129, + "args": { + "External id": 460329,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451643169.327, "dur": 40.705, + "args": { + "External id": 460330,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451643236.653, "dur": 27.697, + "args": { + "External id": 460331,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451643279.503, "dur": 19.924, + "args": { + "External id": 460332,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451643313.483, "dur": 52.587, + "args": { + "External id": 460333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451643373.950, "dur": 42.505, + "args": { + "External id": 460334,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267451643470.650, "dur": 360.457, + "args": { + "External id": 460335,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451643573.558, "dur": 8.776, + "args": { + "External id": 460336,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451643585.075, "dur": 3.132, + "args": { + "External id": 460337,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451643590.264, "dur": 24.105, + "args": { + "External id": 460338,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451643617.403, "dur": 2.961, + "args": { + "External id": 460339,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451643684.471, "dur": 6.620, + "args": { + "External id": 460340,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451643687.302, "dur": 3.560, + "args": { + "External id": 460341,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451643693.724, "dur": 52.310, + "args": { + "External id": 460342,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451643709.155, "dur": 4.836, + "args": { + "External id": 460343,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451643748.320, "dur": 2.146, + "args": { + "External id": 460344,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451643749.568, "dur": 0.805, + "args": { + "External id": 460345,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451643752.017, "dur": 23.165, + "args": { + "External id": 460346,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451643754.568, "dur": 0.716, + "args": { + "External id": 460347,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451643874.878, "dur": 35.598, + "args": { + "External id": 460348,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451643931.243, "dur": 21.663, + "args": { + "External id": 460349,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451643963.782, "dur": 59.157, + "args": { + "External id": 460350,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451644032.070, "dur": 91.061, + "args": { + "External id": 460351,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451644143.352, "dur": 30.817, + "args": { + "External id": 460352,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451644182.505, "dur": 39.661, + "args": { + "External id": 460353,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451644233.105, "dur": 34.517, + "args": { + "External id": 460354,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451644277.064, "dur": 37.382, + "args": { + "External id": 460355,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267451644338.494, "dur": 29.137, + "args": { + "External id": 460356,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451644385.998, "dur": 28.070, + "args": { + "External id": 460357,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451644430.566, "dur": 21.944, + "args": { + "External id": 460358,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451644471.378, "dur": 18.172, + "args": { + "External id": 460359,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267451644504.005, "dur": 19.305, + "args": { + "External id": 460360,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644633.698, "dur": 19.485, + "args": { + "External id": 460361,"Record function id": 0, "Ev Idx": 3144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644638.412, "dur": 13.258, + "args": { + "External id": 460362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644643.188, "dur": 7.060, + "args": { + "External id": 460363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644645.195, "dur": 4.760, + "args": { + "External id": 460364,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644658.212, "dur": 36.746, + "args": { + "External id": 460365,"Record function id": 0, "Ev Idx": 3148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644660.397, "dur": 33.980, + "args": { + "External id": 460366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644661.526, "dur": 32.107, + "args": { + "External id": 460367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644692.032, "dur": 1.473, + "args": { + "External id": 460368,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644699.519, "dur": 8.652, + "args": { + "External id": 460369,"Record function id": 0, "Ev Idx": 3152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644701.425, "dur": 6.209, + "args": { + "External id": 460370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644702.369, "dur": 4.767, + "args": { + "External id": 460371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644703.192, "dur": 3.867, + "args": { + "External id": 460372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644712.292, "dur": 5.011, + "args": { + "External id": 460373,"Record function id": 0, "Ev Idx": 3156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644713.973, "dur": 2.848, + "args": { + "External id": 460374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644714.703, "dur": 1.646, + "args": { + "External id": 460375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644715.206, "dur": 1.059, + "args": { + "External id": 460376,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644721.226, "dur": 5.410, + "args": { + "External id": 460377,"Record function id": 0, "Ev Idx": 3160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644722.825, "dur": 3.293, + "args": { + "External id": 460378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644723.741, "dur": 1.873, + "args": { + "External id": 460379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644724.613, "dur": 0.921, + "args": { + "External id": 460380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644730.539, "dur": 5.708, + "args": { + "External id": 460381,"Record function id": 0, "Ev Idx": 3164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644732.473, "dur": 3.288, + "args": { + "External id": 460382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644733.418, "dur": 1.881, + "args": { + "External id": 460383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644734.093, "dur": 1.106, + "args": { + "External id": 460384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644740.152, "dur": 7.490, + "args": { + "External id": 460385,"Record function id": 0, "Ev Idx": 3168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644741.754, "dur": 5.411, + "args": { + "External id": 460386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644743.046, "dur": 3.527, + "args": { + "External id": 460387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644745.472, "dur": 1.020, + "args": { + "External id": 460388,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644751.575, "dur": 4.698, + "args": { + "External id": 460389,"Record function id": 0, "Ev Idx": 3172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644753.161, "dur": 2.617, + "args": { + "External id": 460390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644753.933, "dur": 1.326, + "args": { + "External id": 460391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644754.491, "dur": 0.688, + "args": { + "External id": 460392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644760.231, "dur": 4.242, + "args": { + "External id": 460393,"Record function id": 0, "Ev Idx": 3176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451644761.489, "dur": 2.499, + "args": { + "External id": 460394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644762.430, "dur": 1.086, + "args": { + "External id": 460395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451644762.745, "dur": 0.690, + "args": { + "External id": 460396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451644769.299, "dur": 62551.331, + "args": { + "External id": 460397,"Record function id": 0, "Sequence number": 5036399, "Fwd thread id": 1, "Ev Idx": 3180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451644771.103, "dur": 62537.122, + "args": { + "External id": 460398,"Sequence number": 5036399, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3181 + } + }, + { + "ph": "f", "id": 191, "pid": 2338707, "tid": 2379456, "ts": 6267451644771.103, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 2338707, "tid": 2379456, + "ts": 6267451644812.940, "dur": 45.491, + "args": { + "External id": 460399,"Record function id": 0, "Ev Idx": 3182 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 2338707, "tid": 2379456, + "ts": 6267451644867.549, "dur": 82.743, + "args": { + "External id": 460400,"Record function id": 0, "Ev Idx": 3183 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 2338707, "tid": 2379456, + "ts": 6267451644958.614, "dur": 62339.800, + "args": { + "External id": 460401,"Record function id": 0, "Ev Idx": 3184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451645113.038, "dur": 10.318, + "args": { + "External id": 460402,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451645137.585, "dur": 8.496, + "args": { + "External id": 460403,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451645167.298, "dur": 61018.323, + "args": { + "External id": 460404,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451645196.425, "dur": 60973.396, + "args": { + "External id": 460405,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451645316.711, "dur": 22.349, + "args": { + "External id": 460406,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451645363.734, "dur": 60746.620, + "args": { + "External id": 460407,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451645367.378, "dur": 60741.696, + "args": { + "External id": 460408,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451645372.829, "dur": 11.503, + "args": { + "External id": 460409,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451645386.686, "dur": 60716.516, + "args": { + "External id": 460410,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451706314.895, "dur": 14.545, + "args": { + "External id": 460411,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451706319.537, "dur": 9.376, + "args": { + "External id": 460412,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451706367.607, "dur": 503.571, + "args": { + "External id": 460413,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451706401.340, "dur": 462.682, + "args": { + "External id": 460414,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3197, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451706415.378, "dur": 436.057, + "args": { + "External id": 460415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451706897.230, "dur": 2.853, + "args": { + "External id": 460416,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3199, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451706983.541, "dur": 7.787, + "args": { + "External id": 460417,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451707092.717, "dur": 2.921, + "args": { + "External id": 460418,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451707121.634, "dur": 4.059, + "args": { + "External id": 460419,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451707141.176, "dur": 1.378, + "args": { + "External id": 460420,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451707155.599, "dur": 1.124, + "args": { + "External id": 460421,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451707170.359, "dur": 1.525, + "args": { + "External id": 460422,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451707185.451, "dur": 3.775, + "args": { + "External id": 460423,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451707202.521, "dur": 2.591, + "args": { + "External id": 460424,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451707217.925, "dur": 1.125, + "args": { + "External id": 460425,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451707340.198, "dur": 3662.772, + "args": { + "External id": 460426,"Record function id": 0, "Ev Idx": 3209 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 2338707, "tid": 2379456, + "ts": 6267451707365.304, "dur": 1377.505, + "args": { + "External id": 460427,"Record function id": 0, "Ev Idx": 3210 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338707, "tid": 2379456, + "ts": 6267451707386.857, "dur": 456.418, + "args": { + "External id": 460428,"Record function id": 0, "Ev Idx": 3211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451707502.011, "dur": 4.786, + "args": { + "External id": 460429,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451707510.604, "dur": 1.414, + "args": { + "External id": 460430,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451707514.312, "dur": 3.291, + "args": { + "External id": 460431,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451707519.515, "dur": 0.943, + "args": { + "External id": 460432,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451707522.114, "dur": 1.027, + "args": { + "External id": 460433,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451707525.048, "dur": 0.973, + "args": { + "External id": 460434,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451707527.828, "dur": 2.590, + "args": { + "External id": 460435,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451707532.231, "dur": 1.162, + "args": { + "External id": 460436,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451707534.862, "dur": 1.326, + "args": { + "External id": 460437,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451707537.864, "dur": 0.944, + "args": { + "External id": 460438,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451707559.790, "dur": 239.709, + "args": { + "External id": 460439,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451707609.090, "dur": 183.749, + "args": { + "External id": 460440,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451707642.606, "dur": 20.108, + "args": { + "External id": 460441,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451707667.796, "dur": 87.909, + "args": { + "External id": 460442,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451707671.237, "dur": 83.946, + "args": { + "External id": 460443,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451707676.598, "dur": 7.219, + "args": { + "External id": 460444,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451707686.278, "dur": 68.260, + "args": { + "External id": 460445,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3228 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 2338707, "tid": 2379456, + "ts": 6267451707957.464, "dur": 775.237, + "args": { + "External id": 460446,"Record function id": 0, "Ev Idx": 3229 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338707, "tid": 2379456, + "ts": 6267451707978.874, "dur": 738.284, + "args": { + "External id": 460447,"Record function id": 0, "Ev Idx": 3230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451708099.502, "dur": 8.563, + "args": { + "External id": 460448,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451708128.428, "dur": 40.080, + "args": { + "External id": 460449,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708135.043, "dur": 1.990, + "args": { + "External id": 460450,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708140.011, "dur": 0.544, + "args": { + "External id": 460451,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708142.849, "dur": 0.527, + "args": { + "External id": 460452,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708146.013, "dur": 0.582, + "args": { + "External id": 460453,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708148.991, "dur": 0.717, + "args": { + "External id": 460454,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708151.901, "dur": 2.644, + "args": { + "External id": 460455,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708156.320, "dur": 0.598, + "args": { + "External id": 460456,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708158.981, "dur": 0.420, + "args": { + "External id": 460457,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708161.141, "dur": 0.625, + "args": { + "External id": 460458,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451708181.604, "dur": 59.000, + "args": { + "External id": 460459,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267451708280.511, "dur": 135.120, + "args": { + "External id": 460460,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451708293.984, "dur": 3.789, + "args": { + "External id": 460461,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267451708304.389, "dur": 12.871, + "args": { + "External id": 460462,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267451708309.912, "dur": 6.818, + "args": { + "External id": 460463,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708314.365, "dur": 0.768, + "args": { + "External id": 460464,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451708325.304, "dur": 31.351, + "args": { + "External id": 460465,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708327.856, "dur": 0.540, + "args": { + "External id": 460466,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708330.471, "dur": 0.532, + "args": { + "External id": 460467,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708333.137, "dur": 2.838, + "args": { + "External id": 460468,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708337.947, "dur": 0.481, + "args": { + "External id": 460469,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708340.177, "dur": 0.423, + "args": { + "External id": 460470,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708342.684, "dur": 0.492, + "args": { + "External id": 460471,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708345.055, "dur": 0.466, + "args": { + "External id": 460472,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708347.454, "dur": 0.505, + "args": { + "External id": 460473,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451708349.819, "dur": 0.498, + "args": { + "External id": 460474,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451708369.440, "dur": 36.707, + "args": { + "External id": 460475,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451708472.120, "dur": 153.294, + "args": { + "External id": 460476,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451708498.338, "dur": 121.563, + "args": { + "External id": 460477,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3260, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451708510.819, "dur": 103.055, + "args": { + "External id": 460478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451708646.465, "dur": 2.923, + "args": { + "External id": 460479,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3262, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451708752.092, "dur": 2223.134, + "args": { + "External id": 460480,"Sequence number": 5036398, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3263 + } + }, + { + "ph": "f", "id": 192, "pid": 2338707, "tid": 2379456, "ts": 6267451708752.092, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451708890.524, "dur": 138.491, + "args": { + "External id": 460481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451709119.416, "dur": 53.449, + "args": { + "External id": 460482,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267451709200.109, "dur": 78.741, + "args": { + "External id": 460483,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451709293.632, "dur": 43.954, + "args": { + "External id": 460484,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451709346.680, "dur": 46.533, + "args": { + "External id": 460485,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451709401.961, "dur": 38.386, + "args": { + "External id": 460486,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451709449.000, "dur": 36.560, + "args": { + "External id": 460487,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451709516.761, "dur": 32.209, + "args": { + "External id": 460488,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451709570.149, "dur": 55.238, + "args": { + "External id": 460489,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451709654.289, "dur": 25.766, + "args": { + "External id": 460490,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451709696.357, "dur": 16.777, + "args": { + "External id": 460491,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451709724.790, "dur": 52.886, + "args": { + "External id": 460492,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451709782.335, "dur": 41.567, + "args": { + "External id": 460493,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267451709880.260, "dur": 364.552, + "args": { + "External id": 460494,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451709986.455, "dur": 7.521, + "args": { + "External id": 460495,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451709997.200, "dur": 2.970, + "args": { + "External id": 460496,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451710001.711, "dur": 1.910, + "args": { + "External id": 460497,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451710005.101, "dur": 2.125, + "args": { + "External id": 460498,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451710109.402, "dur": 6.896, + "args": { + "External id": 460499,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451710111.885, "dur": 3.973, + "args": { + "External id": 460500,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451710118.750, "dur": 43.634, + "args": { + "External id": 460501,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451710125.905, "dur": 5.245, + "args": { + "External id": 460502,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451710164.375, "dur": 2.541, + "args": { + "External id": 460503,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451710165.980, "dur": 0.779, + "args": { + "External id": 460504,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451710168.844, "dur": 17.965, + "args": { + "External id": 460505,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451710170.769, "dur": 0.713, + "args": { + "External id": 460506,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451710290.478, "dur": 33.178, + "args": { + "External id": 460507,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451710343.001, "dur": 21.325, + "args": { + "External id": 460508,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451710375.848, "dur": 61.942, + "args": { + "External id": 460509,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451710446.980, "dur": 52.020, + "args": { + "External id": 460510,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451710512.821, "dur": 30.941, + "args": { + "External id": 460511,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451710551.253, "dur": 61.296, + "args": { + "External id": 460512,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451710626.298, "dur": 40.645, + "args": { + "External id": 460513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451710676.425, "dur": 39.088, + "args": { + "External id": 460514,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267451710738.971, "dur": 29.622, + "args": { + "External id": 460515,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451710786.495, "dur": 31.164, + "args": { + "External id": 460516,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451710834.816, "dur": 22.923, + "args": { + "External id": 460517,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451710876.084, "dur": 18.983, + "args": { + "External id": 460518,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267451710913.713, "dur": 20.975, + "args": { + "External id": 460519,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711030.537, "dur": 20.740, + "args": { + "External id": 460520,"Record function id": 0, "Ev Idx": 3303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711037.816, "dur": 12.250, + "args": { + "External id": 460521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711042.343, "dur": 6.668, + "args": { + "External id": 460522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711044.457, "dur": 4.373, + "args": { + "External id": 460523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711100.488, "dur": 11.487, + "args": { + "External id": 460524,"Record function id": 0, "Ev Idx": 3307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711103.751, "dur": 7.335, + "args": { + "External id": 460525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711105.995, "dur": 3.801, + "args": { + "External id": 460526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711107.401, "dur": 2.071, + "args": { + "External id": 460527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711117.533, "dur": 8.102, + "args": { + "External id": 460528,"Record function id": 0, "Ev Idx": 3311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711118.884, "dur": 6.226, + "args": { + "External id": 460529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711120.017, "dur": 4.467, + "args": { + "External id": 460530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711120.378, "dur": 4.030, + "args": { + "External id": 460531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711130.115, "dur": 5.223, + "args": { + "External id": 460532,"Record function id": 0, "Ev Idx": 3315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711131.867, "dur": 2.997, + "args": { + "External id": 460533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711133.039, "dur": 1.310, + "args": { + "External id": 460534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711133.433, "dur": 0.835, + "args": { + "External id": 460535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711139.277, "dur": 4.377, + "args": { + "External id": 460536,"Record function id": 0, "Ev Idx": 3319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711140.785, "dur": 2.365, + "args": { + "External id": 460537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711141.450, "dur": 1.166, + "args": { + "External id": 460538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711141.762, "dur": 0.774, + "args": { + "External id": 460539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711147.628, "dur": 5.186, + "args": { + "External id": 460540,"Record function id": 0, "Ev Idx": 3323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711149.128, "dur": 3.206, + "args": { + "External id": 460541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711150.177, "dur": 1.659, + "args": { + "External id": 460542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711150.980, "dur": 0.746, + "args": { + "External id": 460543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711156.674, "dur": 4.728, + "args": { + "External id": 460544,"Record function id": 0, "Ev Idx": 3327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711157.996, "dur": 2.925, + "args": { + "External id": 460545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711158.688, "dur": 1.587, + "args": { + "External id": 460546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711159.381, "dur": 0.781, + "args": { + "External id": 460547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711165.182, "dur": 4.527, + "args": { + "External id": 460548,"Record function id": 0, "Ev Idx": 3331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711166.661, "dur": 2.552, + "args": { + "External id": 460549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711167.647, "dur": 1.080, + "args": { + "External id": 460550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711167.955, "dur": 0.693, + "args": { + "External id": 460551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711173.562, "dur": 4.604, + "args": { + "External id": 460552,"Record function id": 0, "Ev Idx": 3335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451711174.873, "dur": 2.793, + "args": { + "External id": 460553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711175.499, "dur": 1.597, + "args": { + "External id": 460554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451711176.286, "dur": 0.705, + "args": { + "External id": 460555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451711183.365, "dur": 59781.047, + "args": { + "External id": 460556,"Record function id": 0, "Sequence number": 5036397, "Fwd thread id": 1, "Ev Idx": 3339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451711184.915, "dur": 59764.843, + "args": { + "External id": 460557,"Sequence number": 5036397, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3340 + } + }, + { + "ph": "f", "id": 193, "pid": 2338707, "tid": 2379456, "ts": 6267451711184.915, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 2338707, "tid": 2379456, + "ts": 6267451711224.500, "dur": 47.616, + "args": { + "External id": 460558,"Record function id": 0, "Ev Idx": 3341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 2338707, "tid": 2379456, + "ts": 6267451711281.291, "dur": 78.878, + "args": { + "External id": 460559,"Record function id": 0, "Ev Idx": 3342 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 2338707, "tid": 2379456, + "ts": 6267451711367.089, "dur": 59572.509, + "args": { + "External id": 460560,"Record function id": 0, "Ev Idx": 3343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451711480.514, "dur": 8.461, + "args": { + "External id": 460561,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451711500.898, "dur": 7.480, + "args": { + "External id": 460562,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451711526.403, "dur": 58373.804, + "args": { + "External id": 460563,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451711543.240, "dur": 58340.274, + "args": { + "External id": 460564,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451711743.385, "dur": 27.351, + "args": { + "External id": 460565,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451711797.224, "dur": 58031.258, + "args": { + "External id": 460566,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451711800.952, "dur": 58026.621, + "args": { + "External id": 460567,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451711806.450, "dur": 14.400, + "args": { + "External id": 460568,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451711823.551, "dur": 58000.397, + "args": { + "External id": 460569,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451770028.953, "dur": 14.491, + "args": { + "External id": 460570,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451770033.729, "dur": 9.060, + "args": { + "External id": 460571,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451770112.151, "dur": 434.936, + "args": { + "External id": 460572,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451770146.257, "dur": 393.947, + "args": { + "External id": 460573,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3356, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451770161.310, "dur": 372.031, + "args": { + "External id": 460574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451770570.280, "dur": 2.662, + "args": { + "External id": 460575,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3358, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451770669.644, "dur": 8.138, + "args": { + "External id": 460576,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451770734.158, "dur": 1.513, + "args": { + "External id": 460577,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451770754.485, "dur": 4.306, + "args": { + "External id": 460578,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451770773.438, "dur": 1.308, + "args": { + "External id": 460579,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451770787.408, "dur": 1.306, + "args": { + "External id": 460580,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451770802.137, "dur": 1.299, + "args": { + "External id": 460581,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451770815.801, "dur": 3.967, + "args": { + "External id": 460582,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451770843.098, "dur": 3.399, + "args": { + "External id": 460583,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451770860.651, "dur": 1.215, + "args": { + "External id": 460584,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451770984.219, "dur": 3697.161, + "args": { + "External id": 460585,"Record function id": 0, "Ev Idx": 3368 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 2338707, "tid": 2379456, + "ts": 6267451771007.645, "dur": 1400.374, + "args": { + "External id": 460586,"Record function id": 0, "Ev Idx": 3369 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338707, "tid": 2379456, + "ts": 6267451771029.748, "dur": 469.086, + "args": { + "External id": 460587,"Record function id": 0, "Ev Idx": 3370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451771187.325, "dur": 6.241, + "args": { + "External id": 460588,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451771197.834, "dur": 1.405, + "args": { + "External id": 460589,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451771201.276, "dur": 3.570, + "args": { + "External id": 460590,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451771206.646, "dur": 1.107, + "args": { + "External id": 460591,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451771210.106, "dur": 1.658, + "args": { + "External id": 460592,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451771213.836, "dur": 1.222, + "args": { + "External id": 460593,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451771216.594, "dur": 3.092, + "args": { + "External id": 460594,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451771221.536, "dur": 1.184, + "args": { + "External id": 460595,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451771224.548, "dur": 0.971, + "args": { + "External id": 460596,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451771227.320, "dur": 1.153, + "args": { + "External id": 460597,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451771250.618, "dur": 205.388, + "args": { + "External id": 460598,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451771270.666, "dur": 179.751, + "args": { + "External id": 460599,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451771302.143, "dur": 19.972, + "args": { + "External id": 460600,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451771326.753, "dur": 84.740, + "args": { + "External id": 460601,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451771330.143, "dur": 80.881, + "args": { + "External id": 460602,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771335.173, "dur": 6.347, + "args": { + "External id": 460603,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451771344.239, "dur": 65.957, + "args": { + "External id": 460604,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 2338707, "tid": 2379456, + "ts": 6267451771637.092, "dur": 760.839, + "args": { + "External id": 460605,"Record function id": 0, "Ev Idx": 3388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338707, "tid": 2379456, + "ts": 6267451771663.938, "dur": 718.611, + "args": { + "External id": 460606,"Record function id": 0, "Ev Idx": 3389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451771746.896, "dur": 7.292, + "args": { + "External id": 460607,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451771772.692, "dur": 42.399, + "args": { + "External id": 460608,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771784.454, "dur": 2.040, + "args": { + "External id": 460609,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771789.080, "dur": 0.698, + "args": { + "External id": 460610,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771791.774, "dur": 0.791, + "args": { + "External id": 460611,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771794.215, "dur": 0.504, + "args": { + "External id": 460612,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771796.541, "dur": 0.388, + "args": { + "External id": 460613,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771798.825, "dur": 3.087, + "args": { + "External id": 460614,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771803.820, "dur": 0.520, + "args": { + "External id": 460615,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771806.332, "dur": 0.477, + "args": { + "External id": 460616,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771808.311, "dur": 0.356, + "args": { + "External id": 460617,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451771827.162, "dur": 56.854, + "args": { + "External id": 460618,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267451771924.813, "dur": 173.466, + "args": { + "External id": 460619,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451771937.660, "dur": 3.767, + "args": { + "External id": 460620,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267451771947.967, "dur": 12.881, + "args": { + "External id": 460621,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267451771953.702, "dur": 6.626, + "args": { + "External id": 460622,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771958.040, "dur": 0.613, + "args": { + "External id": 460623,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451771968.820, "dur": 29.585, + "args": { + "External id": 460624,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771971.288, "dur": 0.545, + "args": { + "External id": 460625,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771974.083, "dur": 0.536, + "args": { + "External id": 460626,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771976.221, "dur": 3.087, + "args": { + "External id": 460627,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771980.813, "dur": 0.531, + "args": { + "External id": 460628,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771983.121, "dur": 0.482, + "args": { + "External id": 460629,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771985.593, "dur": 0.452, + "args": { + "External id": 460630,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771987.783, "dur": 0.417, + "args": { + "External id": 460631,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771989.755, "dur": 0.380, + "args": { + "External id": 460632,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451771992.130, "dur": 0.878, + "args": { + "External id": 460633,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451772012.648, "dur": 34.666, + "args": { + "External id": 460634,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451772160.483, "dur": 138.979, + "args": { + "External id": 460635,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451772187.536, "dur": 107.167, + "args": { + "External id": 460636,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3419, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451772200.108, "dur": 89.372, + "args": { + "External id": 460637,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451772316.334, "dur": 2.174, + "args": { + "External id": 460638,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3421, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451772418.067, "dur": 2237.121, + "args": { + "External id": 460639,"Sequence number": 5036396, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3422 + } + }, + { + "ph": "f", "id": 194, "pid": 2338707, "tid": 2379456, "ts": 6267451772418.067, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451772555.759, "dur": 155.493, + "args": { + "External id": 460640,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451772761.893, "dur": 51.411, + "args": { + "External id": 460641,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267451772838.340, "dur": 66.371, + "args": { + "External id": 460642,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451772918.200, "dur": 39.943, + "args": { + "External id": 460643,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451772967.083, "dur": 43.224, + "args": { + "External id": 460644,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451773019.406, "dur": 35.425, + "args": { + "External id": 460645,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451773111.255, "dur": 46.053, + "args": { + "External id": 460646,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451773191.039, "dur": 31.529, + "args": { + "External id": 460647,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451773244.430, "dur": 35.002, + "args": { + "External id": 460648,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451773303.725, "dur": 24.757, + "args": { + "External id": 460649,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451773343.251, "dur": 18.280, + "args": { + "External id": 460650,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451773372.186, "dur": 52.355, + "args": { + "External id": 460651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451773429.227, "dur": 43.546, + "args": { + "External id": 460652,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267451773515.293, "dur": 384.313, + "args": { + "External id": 460653,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451773665.446, "dur": 10.354, + "args": { + "External id": 460654,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451773679.153, "dur": 3.947, + "args": { + "External id": 460655,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451773684.709, "dur": 2.331, + "args": { + "External id": 460656,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451773688.799, "dur": 6.150, + "args": { + "External id": 460657,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451773762.042, "dur": 6.265, + "args": { + "External id": 460658,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451773764.410, "dur": 3.663, + "args": { + "External id": 460659,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451773770.814, "dur": 44.061, + "args": { + "External id": 460660,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451773777.907, "dur": 4.653, + "args": { + "External id": 460661,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451773816.842, "dur": 2.558, + "args": { + "External id": 460662,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451773818.414, "dur": 0.898, + "args": { + "External id": 460663,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451773820.807, "dur": 19.170, + "args": { + "External id": 460664,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451773822.813, "dur": 0.701, + "args": { + "External id": 460665,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451773941.695, "dur": 36.048, + "args": { + "External id": 460666,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451773997.722, "dur": 22.379, + "args": { + "External id": 460667,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451774030.491, "dur": 104.241, + "args": { + "External id": 460668,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451774147.952, "dur": 55.958, + "args": { + "External id": 460669,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451774218.860, "dur": 28.102, + "args": { + "External id": 460670,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451774254.478, "dur": 38.604, + "args": { + "External id": 460671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451774302.760, "dur": 34.034, + "args": { + "External id": 460672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451774346.647, "dur": 35.790, + "args": { + "External id": 460673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267451774406.790, "dur": 33.284, + "args": { + "External id": 460674,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451774459.597, "dur": 30.256, + "args": { + "External id": 460675,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451774506.633, "dur": 20.537, + "args": { + "External id": 460676,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451774546.724, "dur": 16.571, + "args": { + "External id": 460677,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267451774577.394, "dur": 37.805, + "args": { + "External id": 460678,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774708.988, "dur": 18.047, + "args": { + "External id": 460679,"Record function id": 0, "Ev Idx": 3462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774713.029, "dur": 12.857, + "args": { + "External id": 460680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774718.088, "dur": 6.641, + "args": { + "External id": 460681,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774720.025, "dur": 4.555, + "args": { + "External id": 460682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774731.924, "dur": 7.176, + "args": { + "External id": 460683,"Record function id": 0, "Ev Idx": 3466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774733.824, "dur": 4.616, + "args": { + "External id": 460684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774734.806, "dur": 2.964, + "args": { + "External id": 460685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774735.820, "dur": 1.795, + "args": { + "External id": 460686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774743.093, "dur": 7.360, + "args": { + "External id": 460687,"Record function id": 0, "Ev Idx": 3470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774744.592, "dur": 5.351, + "args": { + "External id": 460688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774745.155, "dur": 4.256, + "args": { + "External id": 460689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774745.652, "dur": 3.639, + "args": { + "External id": 460690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774754.473, "dur": 4.826, + "args": { + "External id": 460691,"Record function id": 0, "Ev Idx": 3474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774756.026, "dur": 2.762, + "args": { + "External id": 460692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774756.686, "dur": 1.345, + "args": { + "External id": 460693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774757.053, "dur": 0.880, + "args": { + "External id": 460694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774763.503, "dur": 4.525, + "args": { + "External id": 460695,"Record function id": 0, "Ev Idx": 3478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774765.009, "dur": 2.486, + "args": { + "External id": 460696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774765.540, "dur": 1.260, + "args": { + "External id": 460697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774765.941, "dur": 0.775, + "args": { + "External id": 460698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774772.096, "dur": 4.778, + "args": { + "External id": 460699,"Record function id": 0, "Ev Idx": 3482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774773.678, "dur": 2.685, + "args": { + "External id": 460700,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774774.483, "dur": 1.298, + "args": { + "External id": 460701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774774.998, "dur": 0.661, + "args": { + "External id": 460702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774780.990, "dur": 4.181, + "args": { + "External id": 460703,"Record function id": 0, "Ev Idx": 3486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774782.366, "dur": 2.290, + "args": { + "External id": 460704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774782.985, "dur": 1.170, + "args": { + "External id": 460705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774783.325, "dur": 0.731, + "args": { + "External id": 460706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774789.136, "dur": 4.437, + "args": { + "External id": 460707,"Record function id": 0, "Ev Idx": 3490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774790.407, "dur": 2.661, + "args": { + "External id": 460708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774791.068, "dur": 1.504, + "args": { + "External id": 460709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774791.717, "dur": 0.776, + "args": { + "External id": 460710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774797.610, "dur": 4.514, + "args": { + "External id": 460711,"Record function id": 0, "Ev Idx": 3494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451774798.979, "dur": 2.646, + "args": { + "External id": 460712,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774799.633, "dur": 1.359, + "args": { + "External id": 460713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451774800.236, "dur": 0.648, + "args": { + "External id": 460714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451774807.012, "dur": 61153.431, + "args": { + "External id": 460715,"Record function id": 0, "Sequence number": 5036395, "Fwd thread id": 1, "Ev Idx": 3498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451774808.459, "dur": 61141.477, + "args": { + "External id": 460716,"Sequence number": 5036395, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3499 + } + }, + { + "ph": "f", "id": 195, "pid": 2338707, "tid": 2379456, "ts": 6267451774808.459, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 2338707, "tid": 2379456, + "ts": 6267451774845.860, "dur": 45.512, + "args": { + "External id": 460717,"Record function id": 0, "Ev Idx": 3500 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 2338707, "tid": 2379456, + "ts": 6267451774900.871, "dur": 74.679, + "args": { + "External id": 460718,"Record function id": 0, "Ev Idx": 3501 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 2338707, "tid": 2379456, + "ts": 6267451774981.980, "dur": 60958.231, + "args": { + "External id": 460719,"Record function id": 0, "Ev Idx": 3502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451775140.980, "dur": 9.756, + "args": { + "External id": 460720,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451775167.850, "dur": 7.872, + "args": { + "External id": 460721,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451775194.777, "dur": 59773.644, + "args": { + "External id": 460722,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451775212.352, "dur": 59739.300, + "args": { + "External id": 460723,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451775376.623, "dur": 28.878, + "args": { + "External id": 460724,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451775430.633, "dur": 59463.402, + "args": { + "External id": 460725,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451775434.110, "dur": 59458.871, + "args": { + "External id": 460726,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451775442.744, "dur": 13.199, + "args": { + "External id": 460727,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451775458.480, "dur": 59430.437, + "args": { + "External id": 460728,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451835128.146, "dur": 13.944, + "args": { + "External id": 460729,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451835132.664, "dur": 8.695, + "args": { + "External id": 460730,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451835179.977, "dur": 372.706, + "args": { + "External id": 460731,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451835212.945, "dur": 333.630, + "args": { + "External id": 460732,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3515, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451835227.770, "dur": 312.443, + "args": { + "External id": 460733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451835576.184, "dur": 2.588, + "args": { + "External id": 460734,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3517, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451835676.620, "dur": 8.453, + "args": { + "External id": 460735,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451835746.678, "dur": 1.570, + "args": { + "External id": 460736,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451835768.617, "dur": 3.910, + "args": { + "External id": 460737,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451835787.546, "dur": 1.063, + "args": { + "External id": 460738,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451835802.351, "dur": 1.184, + "args": { + "External id": 460739,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451835816.495, "dur": 1.422, + "args": { + "External id": 460740,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451835830.531, "dur": 3.561, + "args": { + "External id": 460741,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451835847.856, "dur": 3.229, + "args": { + "External id": 460742,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451835864.775, "dur": 1.222, + "args": { + "External id": 460743,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451835979.916, "dur": 3691.561, + "args": { + "External id": 460744,"Record function id": 0, "Ev Idx": 3527 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 2338707, "tid": 2379456, + "ts": 6267451836003.967, "dur": 1398.931, + "args": { + "External id": 460745,"Record function id": 0, "Ev Idx": 3528 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338707, "tid": 2379456, + "ts": 6267451836022.476, "dur": 468.985, + "args": { + "External id": 460746,"Record function id": 0, "Ev Idx": 3529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451836178.803, "dur": 6.575, + "args": { + "External id": 460747,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451836189.644, "dur": 1.377, + "args": { + "External id": 460748,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451836193.033, "dur": 4.022, + "args": { + "External id": 460749,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451836198.920, "dur": 1.249, + "args": { + "External id": 460750,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451836202.073, "dur": 1.192, + "args": { + "External id": 460751,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451836205.084, "dur": 0.866, + "args": { + "External id": 460752,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451836207.611, "dur": 3.340, + "args": { + "External id": 460753,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451836212.590, "dur": 1.208, + "args": { + "External id": 460754,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451836215.386, "dur": 1.201, + "args": { + "External id": 460755,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451836218.321, "dur": 1.075, + "args": { + "External id": 460756,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451836241.215, "dur": 209.624, + "args": { + "External id": 460757,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451836260.322, "dur": 184.805, + "args": { + "External id": 460758,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451836300.503, "dur": 19.649, + "args": { + "External id": 460759,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451836324.675, "dur": 82.889, + "args": { + "External id": 460760,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451836327.881, "dur": 79.337, + "args": { + "External id": 460761,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836332.879, "dur": 6.646, + "args": { + "External id": 460762,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451836341.860, "dur": 64.315, + "args": { + "External id": 460763,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 2338707, "tid": 2379456, + "ts": 6267451836624.216, "dur": 768.862, + "args": { + "External id": 460764,"Record function id": 0, "Ev Idx": 3547 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338707, "tid": 2379456, + "ts": 6267451836648.340, "dur": 728.412, + "args": { + "External id": 460765,"Record function id": 0, "Ev Idx": 3548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451836726.093, "dur": 7.614, + "args": { + "External id": 460766,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451836752.024, "dur": 46.439, + "args": { + "External id": 460767,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836758.296, "dur": 2.340, + "args": { + "External id": 460768,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836763.189, "dur": 0.634, + "args": { + "External id": 460769,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836765.920, "dur": 0.709, + "args": { + "External id": 460770,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836768.450, "dur": 0.636, + "args": { + "External id": 460771,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836771.028, "dur": 0.725, + "args": { + "External id": 460772,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836773.800, "dur": 3.161, + "args": { + "External id": 460773,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836779.008, "dur": 0.527, + "args": { + "External id": 460774,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836787.909, "dur": 0.534, + "args": { + "External id": 460775,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836790.297, "dur": 0.449, + "args": { + "External id": 460776,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451836814.809, "dur": 54.980, + "args": { + "External id": 460777,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267451836907.614, "dur": 134.527, + "args": { + "External id": 460778,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451836926.615, "dur": 3.839, + "args": { + "External id": 460779,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267451836937.416, "dur": 13.077, + "args": { + "External id": 460780,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267451836942.953, "dur": 6.990, + "args": { + "External id": 460781,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836947.620, "dur": 0.716, + "args": { + "External id": 460782,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451836958.391, "dur": 28.785, + "args": { + "External id": 460783,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836960.845, "dur": 0.546, + "args": { + "External id": 460784,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836963.162, "dur": 0.765, + "args": { + "External id": 460785,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836965.680, "dur": 2.887, + "args": { + "External id": 460786,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836970.257, "dur": 0.514, + "args": { + "External id": 460787,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836972.644, "dur": 0.497, + "args": { + "External id": 460788,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836974.781, "dur": 0.488, + "args": { + "External id": 460789,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836977.011, "dur": 0.563, + "args": { + "External id": 460790,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836979.162, "dur": 0.459, + "args": { + "External id": 460791,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451836981.392, "dur": 0.576, + "args": { + "External id": 460792,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451836999.339, "dur": 33.788, + "args": { + "External id": 460793,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451837146.364, "dur": 142.804, + "args": { + "External id": 460794,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451837173.638, "dur": 110.685, + "args": { + "External id": 460795,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3578, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451837185.611, "dur": 93.600, + "args": { + "External id": 460796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451837307.220, "dur": 2.391, + "args": { + "External id": 460797,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3580, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451837411.512, "dur": 2232.625, + "args": { + "External id": 460798,"Sequence number": 5036394, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3581 + } + }, + { + "ph": "f", "id": 196, "pid": 2338707, "tid": 2379456, "ts": 6267451837411.512, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451837549.403, "dur": 154.400, + "args": { + "External id": 460799,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451837755.132, "dur": 48.790, + "args": { + "External id": 460800,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267451837863.377, "dur": 79.514, + "args": { + "External id": 460801,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451837959.296, "dur": 40.108, + "args": { + "External id": 460802,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451838008.404, "dur": 39.483, + "args": { + "External id": 460803,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451838101.502, "dur": 43.018, + "args": { + "External id": 460804,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451838155.713, "dur": 36.102, + "args": { + "External id": 460805,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451838225.657, "dur": 31.021, + "args": { + "External id": 460806,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451838277.957, "dur": 33.939, + "args": { + "External id": 460807,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451838338.762, "dur": 21.759, + "args": { + "External id": 460808,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451838377.990, "dur": 16.281, + "args": { + "External id": 460809,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451838404.801, "dur": 45.961, + "args": { + "External id": 460810,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451838455.678, "dur": 39.648, + "args": { + "External id": 460811,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267451838532.570, "dur": 350.158, + "args": { + "External id": 460812,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451838649.237, "dur": 7.881, + "args": { + "External id": 460813,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451838659.970, "dur": 2.791, + "args": { + "External id": 460814,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451838664.285, "dur": 2.052, + "args": { + "External id": 460815,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451838667.704, "dur": 1.895, + "args": { + "External id": 460816,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451838723.123, "dur": 22.358, + "args": { + "External id": 460817,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451838740.617, "dur": 4.056, + "args": { + "External id": 460818,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451838748.672, "dur": 48.429, + "args": { + "External id": 460819,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451838756.927, "dur": 4.282, + "args": { + "External id": 460820,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451838799.538, "dur": 1.735, + "args": { + "External id": 460821,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451838800.541, "dur": 0.643, + "args": { + "External id": 460822,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451838802.553, "dur": 16.904, + "args": { + "External id": 460823,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451838805.276, "dur": 0.763, + "args": { + "External id": 460824,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451838933.808, "dur": 35.170, + "args": { + "External id": 460825,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451838987.127, "dur": 18.727, + "args": { + "External id": 460826,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451839019.601, "dur": 105.231, + "args": { + "External id": 460827,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451839140.321, "dur": 57.313, + "args": { + "External id": 460828,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451839213.357, "dur": 26.803, + "args": { + "External id": 460829,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451839251.268, "dur": 37.506, + "args": { + "External id": 460830,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451839298.532, "dur": 32.281, + "args": { + "External id": 460831,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451839340.500, "dur": 36.947, + "args": { + "External id": 460832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267451839404.229, "dur": 29.642, + "args": { + "External id": 460833,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451839452.059, "dur": 30.554, + "args": { + "External id": 460834,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451839500.064, "dur": 20.137, + "args": { + "External id": 460835,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451839539.789, "dur": 17.480, + "args": { + "External id": 460836,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267451839571.718, "dur": 18.057, + "args": { + "External id": 460837,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839699.233, "dur": 18.504, + "args": { + "External id": 460838,"Record function id": 0, "Ev Idx": 3621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839703.174, "dur": 13.312, + "args": { + "External id": 460839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839708.004, "dur": 7.086, + "args": { + "External id": 460840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839709.759, "dur": 5.218, + "args": { + "External id": 460841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839722.681, "dur": 5.701, + "args": { + "External id": 460842,"Record function id": 0, "Ev Idx": 3625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839724.690, "dur": 3.118, + "args": { + "External id": 460843,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839725.561, "dur": 1.643, + "args": { + "External id": 460844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839726.150, "dur": 0.897, + "args": { + "External id": 460845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839732.473, "dur": 8.149, + "args": { + "External id": 460846,"Record function id": 0, "Ev Idx": 3629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839734.283, "dur": 5.841, + "args": { + "External id": 460847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839734.909, "dur": 4.740, + "args": { + "External id": 460848,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839735.715, "dur": 3.842, + "args": { + "External id": 460849,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839744.626, "dur": 6.110, + "args": { + "External id": 460850,"Record function id": 0, "Ev Idx": 3633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839746.396, "dur": 3.746, + "args": { + "External id": 460851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839747.142, "dur": 2.478, + "args": { + "External id": 460852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839748.465, "dur": 1.021, + "args": { + "External id": 460853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839754.970, "dur": 4.534, + "args": { + "External id": 460854,"Record function id": 0, "Ev Idx": 3637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839756.568, "dur": 2.303, + "args": { + "External id": 460855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839757.121, "dur": 1.259, + "args": { + "External id": 460856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839757.485, "dur": 0.817, + "args": { + "External id": 460857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839763.605, "dur": 4.779, + "args": { + "External id": 460858,"Record function id": 0, "Ev Idx": 3641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839765.165, "dur": 2.666, + "args": { + "External id": 460859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839765.875, "dur": 1.438, + "args": { + "External id": 460860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839766.229, "dur": 0.954, + "args": { + "External id": 460861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839772.230, "dur": 5.791, + "args": { + "External id": 460862,"Record function id": 0, "Ev Idx": 3645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839773.743, "dur": 3.744, + "args": { + "External id": 460863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839774.627, "dur": 2.382, + "args": { + "External id": 460864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839775.885, "dur": 0.997, + "args": { + "External id": 460865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839782.015, "dur": 5.034, + "args": { + "External id": 460866,"Record function id": 0, "Ev Idx": 3649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839783.492, "dur": 3.028, + "args": { + "External id": 460867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839784.096, "dur": 1.787, + "args": { + "External id": 460868,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839784.606, "dur": 0.953, + "args": { + "External id": 460869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839790.957, "dur": 4.695, + "args": { + "External id": 460870,"Record function id": 0, "Ev Idx": 3653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451839792.582, "dur": 2.542, + "args": { + "External id": 460871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839793.397, "dur": 1.176, + "args": { + "External id": 460872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451839793.710, "dur": 0.766, + "args": { + "External id": 460873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451839800.579, "dur": 72547.861, + "args": { + "External id": 460874,"Record function id": 0, "Sequence number": 5036393, "Fwd thread id": 1, "Ev Idx": 3657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451839802.106, "dur": 72534.830, + "args": { + "External id": 460875,"Sequence number": 5036393, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3658 + } + }, + { + "ph": "f", "id": 197, "pid": 2338707, "tid": 2379456, "ts": 6267451839802.106, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 2338707, "tid": 2379456, + "ts": 6267451839837.810, "dur": 47.390, + "args": { + "External id": 460876,"Record function id": 0, "Ev Idx": 3659 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 2338707, "tid": 2379456, + "ts": 6267451839894.782, "dur": 84.522, + "args": { + "External id": 460877,"Record function id": 0, "Ev Idx": 3660 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 2338707, "tid": 2379456, + "ts": 6267451839987.008, "dur": 72340.372, + "args": { + "External id": 460878,"Record function id": 0, "Ev Idx": 3661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451840158.342, "dur": 10.934, + "args": { + "External id": 460879,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451840183.537, "dur": 8.704, + "args": { + "External id": 460880,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451840212.183, "dur": 70998.020, + "args": { + "External id": 460881,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451840229.904, "dur": 70964.260, + "args": { + "External id": 460882,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451840348.230, "dur": 21.447, + "args": { + "External id": 460883,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451840394.379, "dur": 70743.398, + "args": { + "External id": 460884,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451840398.351, "dur": 70738.157, + "args": { + "External id": 460885,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451840403.877, "dur": 11.950, + "args": { + "External id": 460886,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451840417.932, "dur": 70712.592, + "args": { + "External id": 460887,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451911340.163, "dur": 13.972, + "args": { + "External id": 460888,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451911344.287, "dur": 9.466, + "args": { + "External id": 460889,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451911390.909, "dur": 521.953, + "args": { + "External id": 460890,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451911430.473, "dur": 474.720, + "args": { + "External id": 460891,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3674, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451911443.993, "dur": 452.526, + "args": { + "External id": 460892,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451911944.114, "dur": 3.036, + "args": { + "External id": 460893,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3676, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451912024.857, "dur": 8.177, + "args": { + "External id": 460894,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451912131.039, "dur": 2.567, + "args": { + "External id": 460895,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451912153.431, "dur": 4.293, + "args": { + "External id": 460896,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451912171.297, "dur": 0.916, + "args": { + "External id": 460897,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451912184.879, "dur": 0.853, + "args": { + "External id": 460898,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451912198.755, "dur": 1.128, + "args": { + "External id": 460899,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451912212.755, "dur": 6.663, + "args": { + "External id": 460900,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451912234.364, "dur": 2.744, + "args": { + "External id": 460901,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451912248.268, "dur": 0.756, + "args": { + "External id": 460902,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451912367.823, "dur": 3615.441, + "args": { + "External id": 460903,"Record function id": 0, "Ev Idx": 3686 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 2338707, "tid": 2379456, + "ts": 6267451912393.572, "dur": 1364.908, + "args": { + "External id": 460904,"Record function id": 0, "Ev Idx": 3687 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338707, "tid": 2379456, + "ts": 6267451912411.309, "dur": 441.878, + "args": { + "External id": 460905,"Record function id": 0, "Ev Idx": 3688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451912521.561, "dur": 5.489, + "args": { + "External id": 460906,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451912531.005, "dur": 1.086, + "args": { + "External id": 460907,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451912534.348, "dur": 3.741, + "args": { + "External id": 460908,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451912540.116, "dur": 1.067, + "args": { + "External id": 460909,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451912542.989, "dur": 1.199, + "args": { + "External id": 460910,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451912548.011, "dur": 0.779, + "args": { + "External id": 460911,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451912550.846, "dur": 2.158, + "args": { + "External id": 460912,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451912554.661, "dur": 0.940, + "args": { + "External id": 460913,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451912557.444, "dur": 0.840, + "args": { + "External id": 460914,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451912561.978, "dur": 0.832, + "args": { + "External id": 460915,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451912583.860, "dur": 229.348, + "args": { + "External id": 460916,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451912631.585, "dur": 175.131, + "args": { + "External id": 460917,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451912663.595, "dur": 19.915, + "args": { + "External id": 460918,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451912688.351, "dur": 82.922, + "args": { + "External id": 460919,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451912691.123, "dur": 79.723, + "args": { + "External id": 460920,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451912696.005, "dur": 7.345, + "args": { + "External id": 460921,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451912705.484, "dur": 64.175, + "args": { + "External id": 460922,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 2338707, "tid": 2379456, + "ts": 6267451912965.033, "dur": 784.720, + "args": { + "External id": 460923,"Record function id": 0, "Ev Idx": 3706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338707, "tid": 2379456, + "ts": 6267451912985.870, "dur": 749.400, + "args": { + "External id": 460924,"Record function id": 0, "Ev Idx": 3707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451913105.679, "dur": 8.887, + "args": { + "External id": 460925,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451913133.071, "dur": 39.033, + "args": { + "External id": 460926,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913139.813, "dur": 2.366, + "args": { + "External id": 460927,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913144.918, "dur": 2.413, + "args": { + "External id": 460928,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913149.118, "dur": 0.453, + "args": { + "External id": 460929,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913151.183, "dur": 0.595, + "args": { + "External id": 460930,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913154.865, "dur": 0.440, + "args": { + "External id": 460931,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913156.818, "dur": 2.802, + "args": { + "External id": 460932,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913161.171, "dur": 0.280, + "args": { + "External id": 460933,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913163.723, "dur": 0.435, + "args": { + "External id": 460934,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913165.923, "dur": 0.415, + "args": { + "External id": 460935,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451913185.766, "dur": 55.220, + "args": { + "External id": 460936,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267451913287.571, "dur": 137.870, + "args": { + "External id": 460937,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451913299.998, "dur": 4.521, + "args": { + "External id": 460938,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267451913310.430, "dur": 18.328, + "args": { + "External id": 460939,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267451913318.087, "dur": 6.783, + "args": { + "External id": 460940,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913322.186, "dur": 0.904, + "args": { + "External id": 460941,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451913337.572, "dur": 28.276, + "args": { + "External id": 460942,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913340.396, "dur": 0.510, + "args": { + "External id": 460943,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913343.098, "dur": 0.832, + "args": { + "External id": 460944,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913345.782, "dur": 2.743, + "args": { + "External id": 460945,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913350.292, "dur": 0.443, + "args": { + "External id": 460946,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913352.307, "dur": 0.398, + "args": { + "External id": 460947,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913354.538, "dur": 0.374, + "args": { + "External id": 460948,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913356.434, "dur": 0.425, + "args": { + "External id": 460949,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913358.539, "dur": 0.401, + "args": { + "External id": 460950,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451913360.592, "dur": 0.492, + "args": { + "External id": 460951,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451913380.488, "dur": 35.518, + "args": { + "External id": 460952,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451913481.686, "dur": 164.167, + "args": { + "External id": 460953,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451913517.071, "dur": 123.834, + "args": { + "External id": 460954,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3737, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451913528.839, "dur": 106.234, + "args": { + "External id": 460955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451913668.874, "dur": 2.357, + "args": { + "External id": 460956,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3739, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451913768.236, "dur": 2186.981, + "args": { + "External id": 460957,"Sequence number": 5036392, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3740 + } + }, + { + "ph": "f", "id": 198, "pid": 2338707, "tid": 2379456, "ts": 6267451913768.236, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451913906.876, "dur": 135.519, + "args": { + "External id": 460958,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451914140.713, "dur": 52.208, + "args": { + "External id": 460959,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267451914215.803, "dur": 69.935, + "args": { + "External id": 460960,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451914300.237, "dur": 39.146, + "args": { + "External id": 460961,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451914347.129, "dur": 38.772, + "args": { + "External id": 460962,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451914394.476, "dur": 32.478, + "args": { + "External id": 460963,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451914436.467, "dur": 35.124, + "args": { + "External id": 460964,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451914504.208, "dur": 31.030, + "args": { + "External id": 460965,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451914560.358, "dur": 55.134, + "args": { + "External id": 460966,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451914647.241, "dur": 24.271, + "args": { + "External id": 460967,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451914688.232, "dur": 18.935, + "args": { + "External id": 460968,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451914718.984, "dur": 51.148, + "args": { + "External id": 460969,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451914774.406, "dur": 60.369, + "args": { + "External id": 460970,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267451914892.118, "dur": 370.849, + "args": { + "External id": 460971,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451914994.850, "dur": 7.657, + "args": { + "External id": 460972,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451915004.874, "dur": 4.149, + "args": { + "External id": 460973,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451915010.683, "dur": 1.920, + "args": { + "External id": 460974,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451915013.913, "dur": 1.844, + "args": { + "External id": 460975,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451915123.134, "dur": 7.205, + "args": { + "External id": 460976,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451915125.858, "dur": 3.946, + "args": { + "External id": 460977,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451915133.153, "dur": 45.924, + "args": { + "External id": 460978,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451915143.371, "dur": 3.838, + "args": { + "External id": 460979,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451915181.210, "dur": 4.799, + "args": { + "External id": 460980,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451915185.275, "dur": 0.635, + "args": { + "External id": 460981,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451915187.410, "dur": 20.213, + "args": { + "External id": 460982,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451915189.965, "dur": 1.990, + "args": { + "External id": 460983,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451915312.034, "dur": 32.960, + "args": { + "External id": 460984,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451915365.576, "dur": 19.523, + "args": { + "External id": 460985,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451915394.045, "dur": 60.875, + "args": { + "External id": 460986,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451915462.516, "dur": 48.719, + "args": { + "External id": 460987,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451915523.600, "dur": 25.315, + "args": { + "External id": 460988,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451915555.062, "dur": 53.037, + "args": { + "External id": 460989,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451915622.316, "dur": 37.447, + "args": { + "External id": 460990,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451915667.364, "dur": 35.252, + "args": { + "External id": 460991,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267451915728.645, "dur": 28.591, + "args": { + "External id": 460992,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451915780.860, "dur": 28.282, + "args": { + "External id": 460993,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451915826.219, "dur": 19.429, + "args": { + "External id": 460994,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451915868.485, "dur": 16.621, + "args": { + "External id": 460995,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267451915901.211, "dur": 19.275, + "args": { + "External id": 460996,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916009.993, "dur": 17.547, + "args": { + "External id": 460997,"Record function id": 0, "Ev Idx": 3780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916014.171, "dur": 12.290, + "args": { + "External id": 460998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916018.773, "dur": 6.614, + "args": { + "External id": 460999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916020.579, "dur": 4.665, + "args": { + "External id": 461000,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916032.206, "dur": 5.640, + "args": { + "External id": 461001,"Record function id": 0, "Ev Idx": 3784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916033.856, "dur": 3.490, + "args": { + "External id": 461002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916034.893, "dur": 1.733, + "args": { + "External id": 461003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916035.505, "dur": 0.981, + "args": { + "External id": 461004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916042.025, "dur": 7.725, + "args": { + "External id": 461005,"Record function id": 0, "Ev Idx": 3788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916043.491, "dur": 5.782, + "args": { + "External id": 461006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916044.132, "dur": 4.418, + "args": { + "External id": 461007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916045.051, "dur": 3.405, + "args": { + "External id": 461008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916053.583, "dur": 46.802, + "args": { + "External id": 461009,"Record function id": 0, "Ev Idx": 3792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916055.240, "dur": 43.677, + "args": { + "External id": 461010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916094.452, "dur": 3.259, + "args": { + "External id": 461011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916095.501, "dur": 1.897, + "args": { + "External id": 461012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916106.820, "dur": 5.850, + "args": { + "External id": 461013,"Record function id": 0, "Ev Idx": 3796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916108.674, "dur": 3.478, + "args": { + "External id": 461014,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916110.213, "dur": 1.289, + "args": { + "External id": 461015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916110.604, "dur": 0.811, + "args": { + "External id": 461016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916116.799, "dur": 7.194, + "args": { + "External id": 461017,"Record function id": 0, "Ev Idx": 3800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916118.363, "dur": 5.100, + "args": { + "External id": 461018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916118.986, "dur": 3.823, + "args": { + "External id": 461019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916121.940, "dur": 0.791, + "args": { + "External id": 461020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916127.919, "dur": 4.174, + "args": { + "External id": 461021,"Record function id": 0, "Ev Idx": 3804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916129.281, "dur": 2.328, + "args": { + "External id": 461022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916129.863, "dur": 1.234, + "args": { + "External id": 461023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916130.266, "dur": 0.722, + "args": { + "External id": 461024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916136.022, "dur": 4.858, + "args": { + "External id": 461025,"Record function id": 0, "Ev Idx": 3808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916137.872, "dur": 2.489, + "args": { + "External id": 461026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916138.546, "dur": 1.302, + "args": { + "External id": 461027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916139.069, "dur": 0.699, + "args": { + "External id": 461028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916144.650, "dur": 5.126, + "args": { + "External id": 461029,"Record function id": 0, "Ev Idx": 3812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451916146.384, "dur": 2.866, + "args": { + "External id": 461030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916147.133, "dur": 1.620, + "args": { + "External id": 461031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451916147.790, "dur": 0.883, + "args": { + "External id": 461032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451916155.307, "dur": 72222.811, + "args": { + "External id": 461033,"Record function id": 0, "Sequence number": 5036391, "Fwd thread id": 1, "Ev Idx": 3816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451916157.442, "dur": 72209.425, + "args": { + "External id": 461034,"Sequence number": 5036391, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3817 + } + }, + { + "ph": "f", "id": 199, "pid": 2338707, "tid": 2379456, "ts": 6267451916157.442, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 2338707, "tid": 2379456, + "ts": 6267451916196.375, "dur": 48.761, + "args": { + "External id": 461035,"Record function id": 0, "Ev Idx": 3818 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 2338707, "tid": 2379456, + "ts": 6267451916255.093, "dur": 79.847, + "args": { + "External id": 461036,"Record function id": 0, "Ev Idx": 3819 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 2338707, "tid": 2379456, + "ts": 6267451916345.739, "dur": 72010.742, + "args": { + "External id": 461037,"Record function id": 0, "Ev Idx": 3820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451916461.677, "dur": 9.370, + "args": { + "External id": 461038,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451916485.202, "dur": 7.757, + "args": { + "External id": 461039,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451916509.997, "dur": 70790.111, + "args": { + "External id": 461040,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451916527.689, "dur": 70756.334, + "args": { + "External id": 461041,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451916683.506, "dur": 31.197, + "args": { + "External id": 461042,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451916740.398, "dur": 70494.994, + "args": { + "External id": 461043,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451916746.803, "dur": 70487.472, + "args": { + "External id": 461044,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451916752.263, "dur": 14.762, + "args": { + "External id": 461045,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451916769.893, "dur": 70460.516, + "args": { + "External id": 461046,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451987429.340, "dur": 12.443, + "args": { + "External id": 461047,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451987433.319, "dur": 7.975, + "args": { + "External id": 461048,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451987479.197, "dur": 455.981, + "args": { + "External id": 461049,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451987518.805, "dur": 409.860, + "args": { + "External id": 461050,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3833, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451987534.264, "dur": 387.281, + "args": { + "External id": 461051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451987965.668, "dur": 3.161, + "args": { + "External id": 461052,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3835, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451988045.875, "dur": 46.960, + "args": { + "External id": 461053,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451988154.185, "dur": 2.220, + "args": { + "External id": 461054,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451988174.934, "dur": 4.445, + "args": { + "External id": 461055,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451988194.131, "dur": 1.028, + "args": { + "External id": 461056,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451988208.980, "dur": 1.019, + "args": { + "External id": 461057,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451988223.501, "dur": 0.978, + "args": { + "External id": 461058,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451988239.939, "dur": 3.528, + "args": { + "External id": 461059,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451988256.669, "dur": 2.460, + "args": { + "External id": 461060,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451988271.482, "dur": 0.737, + "args": { + "External id": 461061,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451988399.149, "dur": 3629.200, + "args": { + "External id": 461062,"Record function id": 0, "Ev Idx": 3845 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 2338707, "tid": 2379456, + "ts": 6267451988424.496, "dur": 1375.068, + "args": { + "External id": 461063,"Record function id": 0, "Ev Idx": 3846 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338707, "tid": 2379456, + "ts": 6267451988444.246, "dur": 455.449, + "args": { + "External id": 461064,"Record function id": 0, "Ev Idx": 3847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451988555.180, "dur": 5.228, + "args": { + "External id": 461065,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451988564.402, "dur": 1.231, + "args": { + "External id": 461066,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451988567.928, "dur": 4.039, + "args": { + "External id": 461067,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451988574.559, "dur": 0.948, + "args": { + "External id": 461068,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451988577.250, "dur": 1.016, + "args": { + "External id": 461069,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451988582.367, "dur": 0.944, + "args": { + "External id": 461070,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451988585.303, "dur": 2.383, + "args": { + "External id": 461071,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451988589.400, "dur": 0.933, + "args": { + "External id": 461072,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451988609.590, "dur": 1.952, + "args": { + "External id": 461073,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451988617.765, "dur": 0.653, + "args": { + "External id": 461074,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451988640.944, "dur": 219.296, + "args": { + "External id": 461075,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451988661.242, "dur": 192.002, + "args": { + "External id": 461076,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451988697.257, "dur": 20.243, + "args": { + "External id": 461077,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451988721.637, "dur": 88.946, + "args": { + "External id": 461078,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451988724.653, "dur": 85.593, + "args": { + "External id": 461079,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451988729.507, "dur": 7.815, + "args": { + "External id": 461080,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451988739.328, "dur": 69.911, + "args": { + "External id": 461081,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 2338707, "tid": 2379456, + "ts": 6267451989010.240, "dur": 779.263, + "args": { + "External id": 461082,"Record function id": 0, "Ev Idx": 3865 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338707, "tid": 2379456, + "ts": 6267451989030.345, "dur": 743.554, + "args": { + "External id": 461083,"Record function id": 0, "Ev Idx": 3866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451989152.651, "dur": 8.365, + "args": { + "External id": 461084,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451989181.008, "dur": 41.554, + "args": { + "External id": 461085,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989187.538, "dur": 2.184, + "args": { + "External id": 461086,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989192.388, "dur": 2.059, + "args": { + "External id": 461087,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989196.390, "dur": 0.401, + "args": { + "External id": 461088,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989198.644, "dur": 0.572, + "args": { + "External id": 461089,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989203.239, "dur": 0.769, + "args": { + "External id": 461090,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989205.572, "dur": 2.958, + "args": { + "External id": 461091,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989210.171, "dur": 0.603, + "args": { + "External id": 461092,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989213.275, "dur": 0.387, + "args": { + "External id": 461093,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989215.629, "dur": 0.700, + "args": { + "External id": 461094,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451989235.629, "dur": 52.083, + "args": { + "External id": 461095,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267451989329.084, "dur": 133.156, + "args": { + "External id": 461096,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451989341.485, "dur": 4.360, + "args": { + "External id": 461097,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267451989352.249, "dur": 11.880, + "args": { + "External id": 461098,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267451989357.217, "dur": 6.376, + "args": { + "External id": 461099,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989361.385, "dur": 0.771, + "args": { + "External id": 461100,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267451989372.460, "dur": 34.869, + "args": { + "External id": 461101,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989375.469, "dur": 0.657, + "args": { + "External id": 461102,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989378.849, "dur": 0.379, + "args": { + "External id": 461103,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989385.817, "dur": 2.648, + "args": { + "External id": 461104,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989390.489, "dur": 1.430, + "args": { + "External id": 461105,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989393.449, "dur": 0.535, + "args": { + "External id": 461106,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989395.418, "dur": 0.398, + "args": { + "External id": 461107,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989398.486, "dur": 0.371, + "args": { + "External id": 461108,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989400.346, "dur": 0.413, + "args": { + "External id": 461109,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451989402.380, "dur": 0.496, + "args": { + "External id": 461110,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451989420.268, "dur": 32.726, + "args": { + "External id": 461111,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267451989516.212, "dur": 169.098, + "args": { + "External id": 461112,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451989554.522, "dur": 125.472, + "args": { + "External id": 461113,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3896, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267451989565.776, "dur": 108.635, + "args": { + "External id": 461114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267451989707.774, "dur": 2.690, + "args": { + "External id": 461115,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3898, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451989809.562, "dur": 2192.652, + "args": { + "External id": 461116,"Sequence number": 5036390, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3899 + } + }, + { + "ph": "f", "id": 200, "pid": 2338707, "tid": 2379456, "ts": 6267451989809.562, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451989945.241, "dur": 177.485, + "args": { + "External id": 461117,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451990180.733, "dur": 51.649, + "args": { + "External id": 461118,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267451990255.046, "dur": 68.885, + "args": { + "External id": 461119,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451990338.360, "dur": 38.579, + "args": { + "External id": 461120,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451990384.978, "dur": 42.247, + "args": { + "External id": 461121,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451990435.263, "dur": 34.027, + "args": { + "External id": 461122,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451990477.550, "dur": 33.929, + "args": { + "External id": 461123,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451990547.113, "dur": 29.657, + "args": { + "External id": 461124,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267451990618.259, "dur": 38.549, + "args": { + "External id": 461125,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451990688.405, "dur": 24.569, + "args": { + "External id": 461126,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451990730.254, "dur": 19.374, + "args": { + "External id": 461127,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451990759.573, "dur": 51.021, + "args": { + "External id": 461128,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451990815.173, "dur": 39.490, + "args": { + "External id": 461129,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267451990892.933, "dur": 390.223, + "args": { + "External id": 461130,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451991013.329, "dur": 9.198, + "args": { + "External id": 461131,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451991025.498, "dur": 3.341, + "args": { + "External id": 461132,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451991030.464, "dur": 1.943, + "args": { + "External id": 461133,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451991033.848, "dur": 1.973, + "args": { + "External id": 461134,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451991147.456, "dur": 11.083, + "args": { + "External id": 461135,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451991152.748, "dur": 5.313, + "args": { + "External id": 461136,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451991160.975, "dur": 42.372, + "args": { + "External id": 461137,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451991167.935, "dur": 4.152, + "args": { + "External id": 461138,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267451991205.109, "dur": 2.098, + "args": { + "External id": 461139,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451991206.447, "dur": 0.667, + "args": { + "External id": 461140,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267451991208.567, "dur": 20.334, + "args": { + "External id": 461141,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451991212.594, "dur": 0.705, + "args": { + "External id": 461142,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267451991332.741, "dur": 34.482, + "args": { + "External id": 461143,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451991391.067, "dur": 21.541, + "args": { + "External id": 461144,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451991421.913, "dur": 63.685, + "args": { + "External id": 461145,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451991493.348, "dur": 50.309, + "args": { + "External id": 461146,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451991556.942, "dur": 25.240, + "args": { + "External id": 461147,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451991588.722, "dur": 57.807, + "args": { + "External id": 461148,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451991659.332, "dur": 34.314, + "args": { + "External id": 461149,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267451991702.135, "dur": 35.651, + "args": { + "External id": 461150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267451991765.542, "dur": 29.258, + "args": { + "External id": 461151,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451991814.008, "dur": 29.774, + "args": { + "External id": 461152,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267451991865.311, "dur": 20.344, + "args": { + "External id": 461153,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267451991908.558, "dur": 19.715, + "args": { + "External id": 461154,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267451991945.488, "dur": 18.148, + "args": { + "External id": 461155,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992093.701, "dur": 25.240, + "args": { + "External id": 461156,"Record function id": 0, "Ev Idx": 3939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992100.729, "dur": 16.493, + "args": { + "External id": 461157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992105.998, "dur": 9.469, + "args": { + "External id": 461158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992107.730, "dur": 7.389, + "args": { + "External id": 461159,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992126.522, "dur": 6.172, + "args": { + "External id": 461160,"Record function id": 0, "Ev Idx": 3943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992128.703, "dur": 3.348, + "args": { + "External id": 461161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992129.724, "dur": 1.494, + "args": { + "External id": 461162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992130.197, "dur": 0.917, + "args": { + "External id": 461163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992137.279, "dur": 8.098, + "args": { + "External id": 461164,"Record function id": 0, "Ev Idx": 3947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992138.937, "dur": 5.859, + "args": { + "External id": 461165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992139.837, "dur": 4.454, + "args": { + "External id": 461166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992140.559, "dur": 3.568, + "args": { + "External id": 461167,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992149.376, "dur": 5.373, + "args": { + "External id": 461168,"Record function id": 0, "Ev Idx": 3951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992151.249, "dur": 2.969, + "args": { + "External id": 461169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992152.251, "dur": 1.423, + "args": { + "External id": 461170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992152.578, "dur": 0.999, + "args": { + "External id": 461171,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992158.655, "dur": 6.155, + "args": { + "External id": 461172,"Record function id": 0, "Ev Idx": 3955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992160.638, "dur": 3.582, + "args": { + "External id": 461173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992161.661, "dur": 1.775, + "args": { + "External id": 461174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992162.497, "dur": 0.855, + "args": { + "External id": 461175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992168.674, "dur": 4.909, + "args": { + "External id": 461176,"Record function id": 0, "Ev Idx": 3959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992170.400, "dur": 2.674, + "args": { + "External id": 461177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992171.303, "dur": 1.241, + "args": { + "External id": 461178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992171.727, "dur": 0.735, + "args": { + "External id": 461179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992177.694, "dur": 18.741, + "args": { + "External id": 461180,"Record function id": 0, "Ev Idx": 3963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992179.297, "dur": 16.537, + "args": { + "External id": 461181,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992180.399, "dur": 14.802, + "args": { + "External id": 461182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992194.298, "dur": 0.776, + "args": { + "External id": 461183,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992204.296, "dur": 5.013, + "args": { + "External id": 461184,"Record function id": 0, "Ev Idx": 3967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992205.901, "dur": 2.911, + "args": { + "External id": 461185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992206.739, "dur": 1.502, + "args": { + "External id": 461186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992207.492, "dur": 0.652, + "args": { + "External id": 461187,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992213.255, "dur": 5.550, + "args": { + "External id": 461188,"Record function id": 0, "Ev Idx": 3971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267451992215.397, "dur": 2.916, + "args": { + "External id": 461189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992216.176, "dur": 1.644, + "args": { + "External id": 461190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267451992216.817, "dur": 0.920, + "args": { + "External id": 461191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451992224.424, "dur": 76769.885, + "args": { + "External id": 461192,"Record function id": 0, "Sequence number": 5036389, "Fwd thread id": 1, "Ev Idx": 3975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267451992226.907, "dur": 76756.350, + "args": { + "External id": 461193,"Sequence number": 5036389, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3976 + } + }, + { + "ph": "f", "id": 201, "pid": 2338707, "tid": 2379456, "ts": 6267451992226.907, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 2338707, "tid": 2379456, + "ts": 6267451992269.789, "dur": 48.901, + "args": { + "External id": 461194,"Record function id": 0, "Ev Idx": 3977 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 2338707, "tid": 2379456, + "ts": 6267451992328.632, "dur": 80.472, + "args": { + "External id": 461195,"Record function id": 0, "Ev Idx": 3978 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 2338707, "tid": 2379456, + "ts": 6267451992416.729, "dur": 76557.452, + "args": { + "External id": 461196,"Record function id": 0, "Ev Idx": 3979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451992528.077, "dur": 9.304, + "args": { + "External id": 461197,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267451992548.818, "dur": 9.283, + "args": { + "External id": 461198,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451992577.347, "dur": 75293.745, + "args": { + "External id": 461199,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267451992612.010, "dur": 75242.728, + "args": { + "External id": 461200,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267451992739.630, "dur": 30.862, + "args": { + "External id": 461201,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267451992796.337, "dur": 75005.093, + "args": { + "External id": 461202,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267451992799.682, "dur": 75000.402, + "args": { + "External id": 461203,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267451992805.198, "dur": 16.096, + "args": { + "External id": 461204,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267451992823.895, "dur": 74970.576, + "args": { + "External id": 461205,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452067997.689, "dur": 13.776, + "args": { + "External id": 461206,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452068001.959, "dur": 8.986, + "args": { + "External id": 461207,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452068048.615, "dur": 534.022, + "args": { + "External id": 461208,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452068119.669, "dur": 456.494, + "args": { + "External id": 461209,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3992, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452068137.249, "dur": 432.313, + "args": { + "External id": 461210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452068631.353, "dur": 4.000, + "args": { + "External id": 461211,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3994, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452068718.519, "dur": 7.703, + "args": { + "External id": 461212,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452068781.036, "dur": 1.608, + "args": { + "External id": 461213,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452068801.339, "dur": 3.924, + "args": { + "External id": 461214,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452068818.533, "dur": 0.843, + "args": { + "External id": 461215,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452068832.047, "dur": 1.022, + "args": { + "External id": 461216,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452068846.681, "dur": 0.957, + "args": { + "External id": 461217,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452068860.207, "dur": 3.444, + "args": { + "External id": 461218,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452068875.882, "dur": 2.397, + "args": { + "External id": 461219,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452068892.937, "dur": 0.956, + "args": { + "External id": 461220,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452069016.852, "dur": 3703.894, + "args": { + "External id": 461221,"Record function id": 0, "Ev Idx": 4004 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 2338707, "tid": 2379456, + "ts": 6267452069041.128, "dur": 1411.629, + "args": { + "External id": 461222,"Record function id": 0, "Ev Idx": 4005 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338707, "tid": 2379456, + "ts": 6267452069101.656, "dur": 427.615, + "args": { + "External id": 461223,"Record function id": 0, "Ev Idx": 4006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452069215.011, "dur": 6.626, + "args": { + "External id": 461224,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452069225.764, "dur": 0.835, + "args": { + "External id": 461225,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452069228.844, "dur": 3.546, + "args": { + "External id": 461226,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452069234.910, "dur": 0.744, + "args": { + "External id": 461227,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452069237.349, "dur": 0.905, + "args": { + "External id": 461228,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452069239.906, "dur": 0.799, + "args": { + "External id": 461229,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452069242.292, "dur": 2.274, + "args": { + "External id": 461230,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452069249.083, "dur": 0.680, + "args": { + "External id": 461231,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452069251.833, "dur": 0.943, + "args": { + "External id": 461232,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452069254.569, "dur": 0.580, + "args": { + "External id": 461233,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452069276.890, "dur": 214.964, + "args": { + "External id": 461234,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452069296.853, "dur": 188.785, + "args": { + "External id": 461235,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452069334.784, "dur": 22.641, + "args": { + "External id": 461236,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452069361.590, "dur": 88.582, + "args": { + "External id": 461237,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452069364.787, "dur": 84.893, + "args": { + "External id": 461238,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452069370.091, "dur": 7.196, + "args": { + "External id": 461239,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452069379.721, "dur": 69.257, + "args": { + "External id": 461240,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4023 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 2338707, "tid": 2379456, + "ts": 6267452069664.656, "dur": 778.490, + "args": { + "External id": 461241,"Record function id": 0, "Ev Idx": 4024 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338707, "tid": 2379456, + "ts": 6267452069686.589, "dur": 742.309, + "args": { + "External id": 461242,"Record function id": 0, "Ev Idx": 4025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452069767.832, "dur": 7.370, + "args": { + "External id": 461243,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452069794.405, "dur": 41.651, + "args": { + "External id": 461244,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452069800.797, "dur": 3.479, + "args": { + "External id": 461245,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452069806.385, "dur": 0.571, + "args": { + "External id": 461246,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452069808.963, "dur": 0.359, + "args": { + "External id": 461247,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452069813.125, "dur": 0.327, + "args": { + "External id": 461248,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452069815.299, "dur": 0.821, + "args": { + "External id": 461249,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452069817.959, "dur": 2.873, + "args": { + "External id": 461250,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452069823.539, "dur": 0.584, + "args": { + "External id": 461251,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452069825.818, "dur": 0.590, + "args": { + "External id": 461252,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452069828.087, "dur": 1.931, + "args": { + "External id": 461253,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452069849.237, "dur": 52.005, + "args": { + "External id": 461254,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267452069938.363, "dur": 191.055, + "args": { + "External id": 461255,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452069950.383, "dur": 4.435, + "args": { + "External id": 461256,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267452069961.269, "dur": 11.683, + "args": { + "External id": 461257,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267452069966.083, "dur": 6.346, + "args": { + "External id": 461258,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452069970.332, "dur": 0.766, + "args": { + "External id": 461259,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452069981.011, "dur": 32.596, + "args": { + "External id": 461260,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452069983.746, "dur": 0.576, + "args": { + "External id": 461261,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452069986.301, "dur": 0.366, + "args": { + "External id": 461262,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452069988.351, "dur": 4.381, + "args": { + "External id": 461263,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452069994.903, "dur": 0.516, + "args": { + "External id": 461264,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452069996.832, "dur": 0.419, + "args": { + "External id": 461265,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452070000.727, "dur": 0.409, + "args": { + "External id": 461266,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452070002.771, "dur": 0.423, + "args": { + "External id": 461267,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452070005.130, "dur": 0.311, + "args": { + "External id": 461268,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452070007.791, "dur": 0.455, + "args": { + "External id": 461269,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452070036.806, "dur": 80.525, + "args": { + "External id": 461270,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452070190.823, "dur": 151.297, + "args": { + "External id": 461271,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452070230.573, "dur": 106.839, + "args": { + "External id": 461272,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4055, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452070243.018, "dur": 89.240, + "args": { + "External id": 461273,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452070362.751, "dur": 1.965, + "args": { + "External id": 461274,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4057, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452070462.729, "dur": 2229.891, + "args": { + "External id": 461275,"Sequence number": 5036388, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4058 + } + }, + { + "ph": "f", "id": 202, "pid": 2338707, "tid": 2379456, "ts": 6267452070462.729, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452070621.194, "dur": 132.735, + "args": { + "External id": 461276,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452070807.548, "dur": 49.333, + "args": { + "External id": 461277,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267452070878.622, "dur": 60.682, + "args": { + "External id": 461278,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452070953.449, "dur": 37.939, + "args": { + "External id": 461279,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452070999.023, "dur": 40.687, + "args": { + "External id": 461280,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452071047.156, "dur": 79.956, + "args": { + "External id": 461281,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452071142.079, "dur": 38.710, + "args": { + "External id": 461282,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452071216.437, "dur": 32.112, + "args": { + "External id": 461283,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452071272.339, "dur": 38.619, + "args": { + "External id": 461284,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452071338.303, "dur": 27.164, + "args": { + "External id": 461285,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452071380.265, "dur": 17.132, + "args": { + "External id": 461286,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452071407.505, "dur": 46.265, + "args": { + "External id": 461287,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452071457.890, "dur": 40.569, + "args": { + "External id": 461288,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267452071556.451, "dur": 335.173, + "args": { + "External id": 461289,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452071677.650, "dur": 11.843, + "args": { + "External id": 461290,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452071692.871, "dur": 4.768, + "args": { + "External id": 461291,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452071699.653, "dur": 2.091, + "args": { + "External id": 461292,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452071703.312, "dur": 2.060, + "args": { + "External id": 461293,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452071762.279, "dur": 8.644, + "args": { + "External id": 461294,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452071767.309, "dur": 3.355, + "args": { + "External id": 461295,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452071773.325, "dur": 42.161, + "args": { + "External id": 461296,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452071780.111, "dur": 3.885, + "args": { + "External id": 461297,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452071817.391, "dur": 2.153, + "args": { + "External id": 461298,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452071818.739, "dur": 0.722, + "args": { + "External id": 461299,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452071820.851, "dur": 21.496, + "args": { + "External id": 461300,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452071824.401, "dur": 1.950, + "args": { + "External id": 461301,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452071943.112, "dur": 31.780, + "args": { + "External id": 461302,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452071997.911, "dur": 22.368, + "args": { + "External id": 461303,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452072029.512, "dur": 99.453, + "args": { + "External id": 461304,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452072140.624, "dur": 51.129, + "args": { + "External id": 461305,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452072205.865, "dur": 27.624, + "args": { + "External id": 461306,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452072240.788, "dur": 36.644, + "args": { + "External id": 461307,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452072286.399, "dur": 33.233, + "args": { + "External id": 461308,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452072327.859, "dur": 36.214, + "args": { + "External id": 461309,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267452072391.085, "dur": 28.478, + "args": { + "External id": 461310,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452072441.779, "dur": 30.283, + "args": { + "External id": 461311,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452072492.051, "dur": 22.477, + "args": { + "External id": 461312,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452072573.420, "dur": 33.671, + "args": { + "External id": 461313,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267452072630.631, "dur": 25.896, + "args": { + "External id": 461314,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072748.482, "dur": 17.701, + "args": { + "External id": 461315,"Record function id": 0, "Ev Idx": 4098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072752.502, "dur": 12.569, + "args": { + "External id": 461316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072757.466, "dur": 6.372, + "args": { + "External id": 461317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072758.975, "dur": 4.732, + "args": { + "External id": 461318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072770.825, "dur": 6.103, + "args": { + "External id": 461319,"Record function id": 0, "Ev Idx": 4102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072773.079, "dur": 3.207, + "args": { + "External id": 461320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072774.032, "dur": 1.435, + "args": { + "External id": 461321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072774.411, "dur": 0.950, + "args": { + "External id": 461322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072781.087, "dur": 8.042, + "args": { + "External id": 461323,"Record function id": 0, "Ev Idx": 4106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072782.590, "dur": 6.007, + "args": { + "External id": 461324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072783.187, "dur": 4.912, + "args": { + "External id": 461325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072784.205, "dur": 3.775, + "args": { + "External id": 461326,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072793.093, "dur": 4.894, + "args": { + "External id": 461327,"Record function id": 0, "Ev Idx": 4110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072794.649, "dur": 2.851, + "args": { + "External id": 461328,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072795.274, "dur": 1.741, + "args": { + "External id": 461329,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072795.908, "dur": 1.035, + "args": { + "External id": 461330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072801.817, "dur": 4.785, + "args": { + "External id": 461331,"Record function id": 0, "Ev Idx": 4114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072803.524, "dur": 2.559, + "args": { + "External id": 461332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072804.050, "dur": 1.472, + "args": { + "External id": 461333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072804.563, "dur": 0.878, + "args": { + "External id": 461334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072810.534, "dur": 5.106, + "args": { + "External id": 461335,"Record function id": 0, "Ev Idx": 4118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072812.154, "dur": 2.966, + "args": { + "External id": 461336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072812.767, "dur": 1.653, + "args": { + "External id": 461337,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072813.589, "dur": 0.746, + "args": { + "External id": 461338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072819.728, "dur": 4.437, + "args": { + "External id": 461339,"Record function id": 0, "Ev Idx": 4122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072821.484, "dur": 2.172, + "args": { + "External id": 461340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072822.019, "dur": 1.149, + "args": { + "External id": 461341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072822.348, "dur": 0.746, + "args": { + "External id": 461342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072828.054, "dur": 6.479, + "args": { + "External id": 461343,"Record function id": 0, "Ev Idx": 4126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072829.404, "dur": 4.660, + "args": { + "External id": 461344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072829.927, "dur": 3.659, + "args": { + "External id": 461345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072832.715, "dur": 0.750, + "args": { + "External id": 461346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072838.460, "dur": 4.364, + "args": { + "External id": 461347,"Record function id": 0, "Ev Idx": 4130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452072840.126, "dur": 2.218, + "args": { + "External id": 461348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072840.674, "dur": 1.172, + "args": { + "External id": 461349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452072841.100, "dur": 0.642, + "args": { + "External id": 461350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452072847.946, "dur": 65992.804, + "args": { + "External id": 461351,"Record function id": 0, "Sequence number": 5036387, "Fwd thread id": 1, "Ev Idx": 4134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452072850.037, "dur": 65979.338, + "args": { + "External id": 461352,"Sequence number": 5036387, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4135 + } + }, + { + "ph": "f", "id": 203, "pid": 2338707, "tid": 2379456, "ts": 6267452072850.037, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 2338707, "tid": 2379456, + "ts": 6267452072886.607, "dur": 48.707, + "args": { + "External id": 461353,"Record function id": 0, "Ev Idx": 4136 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 2338707, "tid": 2379456, + "ts": 6267452072945.280, "dur": 80.989, + "args": { + "External id": 461354,"Record function id": 0, "Ev Idx": 4137 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 2338707, "tid": 2379456, + "ts": 6267452073033.633, "dur": 65786.113, + "args": { + "External id": 461355,"Record function id": 0, "Ev Idx": 4138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452073186.422, "dur": 10.056, + "args": { + "External id": 461356,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452073225.052, "dur": 9.559, + "args": { + "External id": 461357,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452073259.273, "dur": 64488.853, + "args": { + "External id": 461358,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452073276.806, "dur": 64456.626, + "args": { + "External id": 461359,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452073390.095, "dur": 23.280, + "args": { + "External id": 461360,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452073440.961, "dur": 64236.438, + "args": { + "External id": 461361,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452073447.230, "dur": 64229.072, + "args": { + "External id": 461362,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452073452.768, "dur": 12.283, + "args": { + "External id": 461363,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452073467.838, "dur": 64203.979, + "args": { + "External id": 461364,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452137878.105, "dur": 14.283, + "args": { + "External id": 461365,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452137882.168, "dur": 9.673, + "args": { + "External id": 461366,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452137929.460, "dur": 502.479, + "args": { + "External id": 461367,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452137969.157, "dur": 455.793, + "args": { + "External id": 461368,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4151, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452137983.948, "dur": 432.680, + "args": { + "External id": 461369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452138468.216, "dur": 2.972, + "args": { + "External id": 461370,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4153, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452138547.381, "dur": 8.241, + "args": { + "External id": 461371,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452138629.705, "dur": 2.908, + "args": { + "External id": 461372,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452138652.098, "dur": 3.967, + "args": { + "External id": 461373,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452138670.581, "dur": 1.232, + "args": { + "External id": 461374,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452138687.886, "dur": 0.997, + "args": { + "External id": 461375,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452138701.232, "dur": 1.012, + "args": { + "External id": 461376,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452138715.027, "dur": 3.154, + "args": { + "External id": 461377,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452138730.728, "dur": 2.635, + "args": { + "External id": 461378,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452138745.464, "dur": 0.883, + "args": { + "External id": 461379,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452138859.855, "dur": 3676.068, + "args": { + "External id": 461380,"Record function id": 0, "Ev Idx": 4163 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 2338707, "tid": 2379456, + "ts": 6267452138884.840, "dur": 1405.472, + "args": { + "External id": 461381,"Record function id": 0, "Ev Idx": 4164 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338707, "tid": 2379456, + "ts": 6267452138902.905, "dur": 461.924, + "args": { + "External id": 461382,"Record function id": 0, "Ev Idx": 4165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452139012.160, "dur": 5.223, + "args": { + "External id": 461383,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452139021.107, "dur": 1.013, + "args": { + "External id": 461384,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452139024.387, "dur": 3.082, + "args": { + "External id": 461385,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452139029.462, "dur": 0.909, + "args": { + "External id": 461386,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452139032.045, "dur": 0.881, + "args": { + "External id": 461387,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452139034.588, "dur": 0.875, + "args": { + "External id": 461388,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452139037.531, "dur": 1.994, + "args": { + "External id": 461389,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452139043.531, "dur": 1.225, + "args": { + "External id": 461390,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452139050.440, "dur": 38.085, + "args": { + "External id": 461391,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452139093.805, "dur": 1.237, + "args": { + "External id": 461392,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452139118.996, "dur": 209.100, + "args": { + "External id": 461393,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452139138.826, "dur": 184.070, + "args": { + "External id": 461394,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452139173.156, "dur": 19.025, + "args": { + "External id": 461395,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452139198.583, "dur": 86.700, + "args": { + "External id": 461396,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452139201.457, "dur": 83.501, + "args": { + "External id": 461397,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139206.287, "dur": 7.219, + "args": { + "External id": 461398,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452139218.812, "dur": 65.075, + "args": { + "External id": 461399,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4182 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 2338707, "tid": 2379456, + "ts": 6267452139471.362, "dur": 809.231, + "args": { + "External id": 461400,"Record function id": 0, "Ev Idx": 4183 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338707, "tid": 2379456, + "ts": 6267452139491.878, "dur": 772.571, + "args": { + "External id": 461401,"Record function id": 0, "Ev Idx": 4184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452139566.438, "dur": 6.440, + "args": { + "External id": 461402,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452139610.177, "dur": 42.742, + "args": { + "External id": 461403,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139617.115, "dur": 2.174, + "args": { + "External id": 461404,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139622.084, "dur": 2.612, + "args": { + "External id": 461405,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139626.384, "dur": 0.473, + "args": { + "External id": 461406,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139628.504, "dur": 0.372, + "args": { + "External id": 461407,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139632.368, "dur": 0.403, + "args": { + "External id": 461408,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139634.501, "dur": 2.983, + "args": { + "External id": 461409,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139639.005, "dur": 0.642, + "args": { + "External id": 461410,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139643.589, "dur": 0.642, + "args": { + "External id": 461411,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139645.898, "dur": 0.373, + "args": { + "External id": 461412,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452139667.651, "dur": 54.831, + "args": { + "External id": 461413,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267452139763.365, "dur": 137.893, + "args": { + "External id": 461414,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452139776.179, "dur": 5.219, + "args": { + "External id": 461415,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267452139787.770, "dur": 11.918, + "args": { + "External id": 461416,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267452139792.718, "dur": 6.415, + "args": { + "External id": 461417,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139796.917, "dur": 0.795, + "args": { + "External id": 461418,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452139808.530, "dur": 33.145, + "args": { + "External id": 461419,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139811.592, "dur": 0.616, + "args": { + "External id": 461420,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139815.436, "dur": 0.681, + "args": { + "External id": 461421,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139817.576, "dur": 2.965, + "args": { + "External id": 461422,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139822.772, "dur": 2.257, + "args": { + "External id": 461423,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139826.923, "dur": 0.486, + "args": { + "External id": 461424,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139828.858, "dur": 0.420, + "args": { + "External id": 461425,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139832.485, "dur": 0.267, + "args": { + "External id": 461426,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139834.568, "dur": 0.476, + "args": { + "External id": 461427,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452139836.508, "dur": 0.285, + "args": { + "External id": 461428,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452139854.939, "dur": 37.378, + "args": { + "External id": 461429,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452139955.964, "dur": 214.635, + "args": { + "External id": 461430,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452139996.038, "dur": 169.515, + "args": { + "External id": 461431,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4214, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452140014.205, "dur": 141.608, + "args": { + "External id": 461432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452140196.425, "dur": 2.704, + "args": { + "External id": 461433,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4216, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452140299.475, "dur": 2210.038, + "args": { + "External id": 461434,"Sequence number": 5036386, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4217 + } + }, + { + "ph": "f", "id": 204, "pid": 2338707, "tid": 2379456, "ts": 6267452140299.475, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452140438.273, "dur": 132.636, + "args": { + "External id": 461435,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452140647.420, "dur": 57.417, + "args": { + "External id": 461436,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267452140728.581, "dur": 75.607, + "args": { + "External id": 461437,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452140819.406, "dur": 38.127, + "args": { + "External id": 461438,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452140865.139, "dur": 39.605, + "args": { + "External id": 461439,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452140913.401, "dur": 32.501, + "args": { + "External id": 461440,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452140954.851, "dur": 33.531, + "args": { + "External id": 461441,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452141021.371, "dur": 29.462, + "args": { + "External id": 461442,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452141117.920, "dur": 36.918, + "args": { + "External id": 461443,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452141188.179, "dur": 23.952, + "args": { + "External id": 461444,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452141230.148, "dur": 17.274, + "args": { + "External id": 461445,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452141257.922, "dur": 52.656, + "args": { + "External id": 461446,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452141315.244, "dur": 39.934, + "args": { + "External id": 461447,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267452141393.416, "dur": 362.782, + "args": { + "External id": 461448,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452141507.481, "dur": 9.389, + "args": { + "External id": 461449,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452141520.090, "dur": 3.173, + "args": { + "External id": 461450,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452141525.155, "dur": 2.255, + "args": { + "External id": 461451,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452141529.038, "dur": 2.103, + "args": { + "External id": 461452,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452141610.921, "dur": 8.087, + "args": { + "External id": 461453,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452141613.477, "dur": 5.047, + "args": { + "External id": 461454,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452141621.711, "dur": 48.170, + "args": { + "External id": 461455,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452141629.067, "dur": 4.187, + "args": { + "External id": 461456,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452141674.499, "dur": 2.189, + "args": { + "External id": 461457,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452141675.533, "dur": 1.080, + "args": { + "External id": 461458,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452141678.063, "dur": 20.625, + "args": { + "External id": 461459,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452141682.197, "dur": 0.533, + "args": { + "External id": 461460,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452141803.477, "dur": 33.860, + "args": { + "External id": 461461,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452141859.599, "dur": 21.480, + "args": { + "External id": 461462,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452141890.224, "dur": 59.106, + "args": { + "External id": 461463,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452141956.789, "dur": 48.590, + "args": { + "External id": 461464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452142017.578, "dur": 26.006, + "args": { + "External id": 461465,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452142049.825, "dur": 81.262, + "args": { + "External id": 461466,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452142144.529, "dur": 34.403, + "args": { + "External id": 461467,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452142186.668, "dur": 35.345, + "args": { + "External id": 461468,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267452142247.333, "dur": 33.677, + "args": { + "External id": 461469,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452142303.228, "dur": 33.327, + "args": { + "External id": 461470,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452142357.454, "dur": 21.992, + "args": { + "External id": 461471,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452142418.038, "dur": 18.375, + "args": { + "External id": 461472,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267452142452.605, "dur": 20.805, + "args": { + "External id": 461473,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142564.123, "dur": 19.759, + "args": { + "External id": 461474,"Record function id": 0, "Ev Idx": 4257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142568.013, "dur": 14.515, + "args": { + "External id": 461475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142573.045, "dur": 8.208, + "args": { + "External id": 461476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142574.785, "dur": 6.340, + "args": { + "External id": 461477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142588.812, "dur": 27.297, + "args": { + "External id": 461478,"Record function id": 0, "Ev Idx": 4261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142590.925, "dur": 23.848, + "args": { + "External id": 461479,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142610.321, "dur": 3.387, + "args": { + "External id": 461480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142611.498, "dur": 1.879, + "args": { + "External id": 461481,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142622.574, "dur": 8.346, + "args": { + "External id": 461482,"Record function id": 0, "Ev Idx": 4265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142624.471, "dur": 5.934, + "args": { + "External id": 461483,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142625.400, "dur": 4.406, + "args": { + "External id": 461484,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142626.202, "dur": 3.529, + "args": { + "External id": 461485,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142635.053, "dur": 5.553, + "args": { + "External id": 461486,"Record function id": 0, "Ev Idx": 4269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142637.190, "dur": 2.901, + "args": { + "External id": 461487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142638.075, "dur": 1.478, + "args": { + "External id": 461488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142638.700, "dur": 0.754, + "args": { + "External id": 461489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142644.582, "dur": 7.851, + "args": { + "External id": 461490,"Record function id": 0, "Ev Idx": 4273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142646.446, "dur": 5.427, + "args": { + "External id": 461491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142647.246, "dur": 4.061, + "args": { + "External id": 461492,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142650.400, "dur": 0.769, + "args": { + "External id": 461493,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142656.573, "dur": 5.008, + "args": { + "External id": 461494,"Record function id": 0, "Ev Idx": 4277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142658.331, "dur": 2.744, + "args": { + "External id": 461495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142659.055, "dur": 1.335, + "args": { + "External id": 461496,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142659.583, "dur": 0.729, + "args": { + "External id": 461497,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142665.968, "dur": 4.979, + "args": { + "External id": 461498,"Record function id": 0, "Ev Idx": 4281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142667.678, "dur": 2.762, + "args": { + "External id": 461499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142668.263, "dur": 1.683, + "args": { + "External id": 461500,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142669.074, "dur": 0.792, + "args": { + "External id": 461501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142674.962, "dur": 5.051, + "args": { + "External id": 461502,"Record function id": 0, "Ev Idx": 4285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142676.733, "dur": 2.760, + "args": { + "External id": 461503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142677.523, "dur": 1.462, + "args": { + "External id": 461504,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142678.190, "dur": 0.716, + "args": { + "External id": 461505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142683.910, "dur": 6.888, + "args": { + "External id": 461506,"Record function id": 0, "Ev Idx": 4289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452142685.472, "dur": 4.815, + "args": { + "External id": 461507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142685.996, "dur": 3.628, + "args": { + "External id": 461508,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452142688.763, "dur": 0.753, + "args": { + "External id": 461509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452142696.135, "dur": 63649.375, + "args": { + "External id": 461510,"Record function id": 0, "Sequence number": 5036385, "Fwd thread id": 1, "Ev Idx": 4293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452142698.221, "dur": 63636.237, + "args": { + "External id": 461511,"Sequence number": 5036385, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4294 + } + }, + { + "ph": "f", "id": 205, "pid": 2338707, "tid": 2379456, "ts": 6267452142698.221, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 2338707, "tid": 2379456, + "ts": 6267452142735.839, "dur": 48.066, + "args": { + "External id": 461512,"Record function id": 0, "Ev Idx": 4295 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 2338707, "tid": 2379456, + "ts": 6267452142793.894, "dur": 81.014, + "args": { + "External id": 461513,"Record function id": 0, "Ev Idx": 4296 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 2338707, "tid": 2379456, + "ts": 6267452142885.909, "dur": 63438.300, + "args": { + "External id": 461514,"Record function id": 0, "Ev Idx": 4297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452142995.570, "dur": 8.144, + "args": { + "External id": 461515,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452143014.929, "dur": 8.188, + "args": { + "External id": 461516,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452143040.469, "dur": 62224.566, + "args": { + "External id": 461517,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452143097.106, "dur": 62150.960, + "args": { + "External id": 461518,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452143223.870, "dur": 26.469, + "args": { + "External id": 461519,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452143279.994, "dur": 61916.699, + "args": { + "External id": 461520,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452143283.581, "dur": 61912.104, + "args": { + "External id": 461521,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452143289.310, "dur": 16.723, + "args": { + "External id": 461522,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452143311.778, "dur": 61881.646, + "args": { + "External id": 461523,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452205390.930, "dur": 12.467, + "args": { + "External id": 461524,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452205394.802, "dur": 8.037, + "args": { + "External id": 461525,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452205438.634, "dur": 475.730, + "args": { + "External id": 461526,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452205481.746, "dur": 426.069, + "args": { + "External id": 461527,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4310, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452205497.872, "dur": 402.886, + "args": { + "External id": 461528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452205944.608, "dur": 2.942, + "args": { + "External id": 461529,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4312, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452206023.259, "dur": 8.125, + "args": { + "External id": 461530,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452206126.573, "dur": 3.019, + "args": { + "External id": 461531,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452206150.247, "dur": 4.423, + "args": { + "External id": 461532,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452206169.751, "dur": 1.389, + "args": { + "External id": 461533,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452206184.647, "dur": 1.069, + "args": { + "External id": 461534,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452206198.896, "dur": 1.158, + "args": { + "External id": 461535,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452206214.832, "dur": 3.705, + "args": { + "External id": 461536,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452206230.537, "dur": 2.673, + "args": { + "External id": 461537,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452206245.655, "dur": 1.098, + "args": { + "External id": 461538,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452206364.961, "dur": 3650.224, + "args": { + "External id": 461539,"Record function id": 0, "Ev Idx": 4322 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 2338707, "tid": 2379456, + "ts": 6267452206389.960, "dur": 1404.309, + "args": { + "External id": 461540,"Record function id": 0, "Ev Idx": 4323 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338707, "tid": 2379456, + "ts": 6267452206407.870, "dur": 451.314, + "args": { + "External id": 461541,"Record function id": 0, "Ev Idx": 4324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452206519.754, "dur": 5.421, + "args": { + "External id": 461542,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452206529.463, "dur": 1.246, + "args": { + "External id": 461543,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452206532.835, "dur": 3.120, + "args": { + "External id": 461544,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452206538.181, "dur": 0.832, + "args": { + "External id": 461545,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452206543.033, "dur": 0.869, + "args": { + "External id": 461546,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452206545.687, "dur": 1.047, + "args": { + "External id": 461547,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452206548.581, "dur": 2.417, + "args": { + "External id": 461548,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452206553.322, "dur": 1.162, + "args": { + "External id": 461549,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452206558.300, "dur": 0.982, + "args": { + "External id": 461550,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452206561.265, "dur": 0.849, + "args": { + "External id": 461551,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452206583.875, "dur": 233.874, + "args": { + "External id": 461552,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452206622.221, "dur": 185.682, + "args": { + "External id": 461553,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452206658.317, "dur": 19.899, + "args": { + "External id": 461554,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452206682.799, "dur": 88.072, + "args": { + "External id": 461555,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452206685.813, "dur": 84.622, + "args": { + "External id": 461556,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452206691.019, "dur": 6.791, + "args": { + "External id": 461557,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452206702.418, "dur": 67.138, + "args": { + "External id": 461558,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 2338707, "tid": 2379456, + "ts": 6267452206975.442, "dur": 809.848, + "args": { + "External id": 461559,"Record function id": 0, "Ev Idx": 4342 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338707, "tid": 2379456, + "ts": 6267452206997.904, "dur": 771.348, + "args": { + "External id": 461560,"Record function id": 0, "Ev Idx": 4343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452207123.357, "dur": 8.545, + "args": { + "External id": 461561,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452207152.458, "dur": 42.603, + "args": { + "External id": 461562,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207159.048, "dur": 4.493, + "args": { + "External id": 461563,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207166.022, "dur": 0.639, + "args": { + "External id": 461564,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207168.989, "dur": 0.588, + "args": { + "External id": 461565,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207173.105, "dur": 0.369, + "args": { + "External id": 461566,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207175.587, "dur": 0.390, + "args": { + "External id": 461567,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207177.493, "dur": 2.828, + "args": { + "External id": 461568,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207183.394, "dur": 0.396, + "args": { + "External id": 461569,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207185.309, "dur": 0.445, + "args": { + "External id": 461570,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207187.435, "dur": 1.834, + "args": { + "External id": 461571,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452207209.115, "dur": 60.371, + "args": { + "External id": 461572,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267452207312.298, "dur": 138.256, + "args": { + "External id": 461573,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452207324.621, "dur": 4.206, + "args": { + "External id": 461574,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267452207335.038, "dur": 11.993, + "args": { + "External id": 461575,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267452207340.229, "dur": 6.242, + "args": { + "External id": 461576,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207344.267, "dur": 0.772, + "args": { + "External id": 461577,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452207354.804, "dur": 35.471, + "args": { + "External id": 461578,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207357.586, "dur": 0.547, + "args": { + "External id": 461579,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207360.174, "dur": 0.620, + "args": { + "External id": 461580,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207367.176, "dur": 3.841, + "args": { + "External id": 461581,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207372.596, "dur": 0.408, + "args": { + "External id": 461582,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207374.654, "dur": 0.480, + "args": { + "External id": 461583,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207378.100, "dur": 0.424, + "args": { + "External id": 461584,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207380.387, "dur": 0.324, + "args": { + "External id": 461585,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207382.171, "dur": 0.480, + "args": { + "External id": 461586,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452207385.523, "dur": 0.438, + "args": { + "External id": 461587,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452207405.248, "dur": 35.799, + "args": { + "External id": 461588,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452207507.904, "dur": 170.227, + "args": { + "External id": 461589,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452207545.626, "dur": 127.407, + "args": { + "External id": 461590,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4373, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452207557.432, "dur": 109.887, + "args": { + "External id": 461591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452207701.187, "dur": 2.414, + "args": { + "External id": 461592,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4375, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452207802.989, "dur": 2183.932, + "args": { + "External id": 461593,"Sequence number": 5036384, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4376 + } + }, + { + "ph": "f", "id": 206, "pid": 2338707, "tid": 2379456, "ts": 6267452207802.989, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452207940.957, "dur": 179.216, + "args": { + "External id": 461594,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452208187.496, "dur": 50.933, + "args": { + "External id": 461595,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267452208261.235, "dur": 72.600, + "args": { + "External id": 461596,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452208349.464, "dur": 38.574, + "args": { + "External id": 461597,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452208395.330, "dur": 39.711, + "args": { + "External id": 461598,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452208442.469, "dur": 33.136, + "args": { + "External id": 461599,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452208483.241, "dur": 33.885, + "args": { + "External id": 461600,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452208550.364, "dur": 28.502, + "args": { + "External id": 461601,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452208620.269, "dur": 43.614, + "args": { + "External id": 461602,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452208697.669, "dur": 24.353, + "args": { + "External id": 461603,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452208738.795, "dur": 17.700, + "args": { + "External id": 461604,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452208765.628, "dur": 50.463, + "args": { + "External id": 461605,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452208820.726, "dur": 40.067, + "args": { + "External id": 461606,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267452208917.678, "dur": 354.805, + "args": { + "External id": 461607,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452209020.882, "dur": 7.489, + "args": { + "External id": 461608,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452209031.477, "dur": 3.772, + "args": { + "External id": 461609,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452209036.772, "dur": 2.236, + "args": { + "External id": 461610,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452209040.745, "dur": 7.044, + "args": { + "External id": 461611,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452209142.683, "dur": 9.319, + "args": { + "External id": 461612,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452209147.724, "dur": 3.789, + "args": { + "External id": 461613,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452209154.552, "dur": 41.481, + "args": { + "External id": 461614,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452209161.949, "dur": 4.743, + "args": { + "External id": 461615,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452209198.032, "dur": 1.874, + "args": { + "External id": 461616,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452209199.147, "dur": 0.656, + "args": { + "External id": 461617,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452209200.932, "dur": 16.218, + "args": { + "External id": 461618,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452209203.222, "dur": 0.481, + "args": { + "External id": 461619,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452209320.203, "dur": 32.107, + "args": { + "External id": 461620,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452209375.947, "dur": 19.428, + "args": { + "External id": 461621,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452209404.640, "dur": 59.902, + "args": { + "External id": 461622,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452209472.057, "dur": 48.020, + "args": { + "External id": 461623,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452209533.055, "dur": 25.136, + "args": { + "External id": 461624,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452209565.198, "dur": 55.006, + "args": { + "External id": 461625,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452209636.884, "dur": 35.938, + "args": { + "External id": 461626,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452209681.088, "dur": 37.516, + "args": { + "External id": 461627,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267452209746.884, "dur": 28.752, + "args": { + "External id": 461628,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452209798.785, "dur": 32.233, + "args": { + "External id": 461629,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452209853.082, "dur": 20.323, + "args": { + "External id": 461630,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452209897.660, "dur": 16.732, + "args": { + "External id": 461631,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267452209929.577, "dur": 21.613, + "args": { + "External id": 461632,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210041.968, "dur": 60.948, + "args": { + "External id": 461633,"Record function id": 0, "Ev Idx": 4416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210046.282, "dur": 54.433, + "args": { + "External id": 461634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210051.485, "dur": 47.115, + "args": { + "External id": 461635,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210053.047, "dur": 44.903, + "args": { + "External id": 461636,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210111.701, "dur": 7.567, + "args": { + "External id": 461637,"Record function id": 0, "Ev Idx": 4420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210114.298, "dur": 4.404, + "args": { + "External id": 461638,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210116.219, "dur": 1.771, + "args": { + "External id": 461639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210116.911, "dur": 0.977, + "args": { + "External id": 461640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210123.779, "dur": 8.373, + "args": { + "External id": 461641,"Record function id": 0, "Ev Idx": 4424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210125.522, "dur": 6.084, + "args": { + "External id": 461642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210126.540, "dur": 4.525, + "args": { + "External id": 461643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210127.441, "dur": 3.478, + "args": { + "External id": 461644,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210136.297, "dur": 5.209, + "args": { + "External id": 461645,"Record function id": 0, "Ev Idx": 4428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210137.862, "dur": 3.141, + "args": { + "External id": 461646,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210138.739, "dur": 1.480, + "args": { + "External id": 461647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210139.253, "dur": 0.886, + "args": { + "External id": 461648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210145.696, "dur": 5.012, + "args": { + "External id": 461649,"Record function id": 0, "Ev Idx": 4432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210147.795, "dur": 2.408, + "args": { + "External id": 461650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210148.455, "dur": 1.185, + "args": { + "External id": 461651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210148.951, "dur": 0.610, + "args": { + "External id": 461652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210154.533, "dur": 4.978, + "args": { + "External id": 461653,"Record function id": 0, "Ev Idx": 4436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210156.282, "dur": 2.720, + "args": { + "External id": 461654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210157.231, "dur": 1.251, + "args": { + "External id": 461655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210157.656, "dur": 0.745, + "args": { + "External id": 461656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210163.686, "dur": 7.177, + "args": { + "External id": 461657,"Record function id": 0, "Ev Idx": 4440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210165.326, "dur": 5.015, + "args": { + "External id": 461658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210165.956, "dur": 3.754, + "args": { + "External id": 461659,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210168.758, "dur": 0.841, + "args": { + "External id": 461660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210174.817, "dur": 5.131, + "args": { + "External id": 461661,"Record function id": 0, "Ev Idx": 4444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210176.639, "dur": 2.759, + "args": { + "External id": 461662,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210177.675, "dur": 1.244, + "args": { + "External id": 461663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210177.999, "dur": 0.820, + "args": { + "External id": 461664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210183.875, "dur": 4.171, + "args": { + "External id": 461665,"Record function id": 0, "Ev Idx": 4448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452210185.158, "dur": 2.362, + "args": { + "External id": 461666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210185.686, "dur": 1.350, + "args": { + "External id": 461667,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452210186.192, "dur": 0.764, + "args": { + "External id": 461668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452210193.604, "dur": 60962.809, + "args": { + "External id": 461669,"Record function id": 0, "Sequence number": 5036383, "Fwd thread id": 1, "Ev Idx": 4452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452210195.756, "dur": 60948.865, + "args": { + "External id": 461670,"Sequence number": 5036383, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4453 + } + }, + { + "ph": "f", "id": 207, "pid": 2338707, "tid": 2379456, "ts": 6267452210195.756, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 2338707, "tid": 2379456, + "ts": 6267452210232.761, "dur": 46.009, + "args": { + "External id": 461671,"Record function id": 0, "Ev Idx": 4454 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 2338707, "tid": 2379456, + "ts": 6267452210287.852, "dur": 79.382, + "args": { + "External id": 461672,"Record function id": 0, "Ev Idx": 4455 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 2338707, "tid": 2379456, + "ts": 6267452210374.215, "dur": 60759.507, + "args": { + "External id": 461673,"Record function id": 0, "Ev Idx": 4456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452210480.890, "dur": 9.604, + "args": { + "External id": 461674,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452210502.622, "dur": 7.692, + "args": { + "External id": 461675,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452210531.088, "dur": 59464.158, + "args": { + "External id": 461676,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452210547.715, "dur": 59430.629, + "args": { + "External id": 461677,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452210699.227, "dur": 30.534, + "args": { + "External id": 461678,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452210756.066, "dur": 59169.016, + "args": { + "External id": 461679,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452210759.747, "dur": 59164.183, + "args": { + "External id": 461680,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452210766.885, "dur": 14.363, + "args": { + "External id": 461681,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452210783.740, "dur": 59134.381, + "args": { + "External id": 461682,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452270155.107, "dur": 14.047, + "args": { + "External id": 461683,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452270159.063, "dur": 9.301, + "args": { + "External id": 461684,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452270205.682, "dur": 518.068, + "args": { + "External id": 461685,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452270244.966, "dur": 471.405, + "args": { + "External id": 461686,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4469, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452270261.477, "dur": 447.234, + "args": { + "External id": 461687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452270756.032, "dur": 2.852, + "args": { + "External id": 461688,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4471, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452270838.359, "dur": 7.991, + "args": { + "External id": 461689,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452270900.885, "dur": 1.771, + "args": { + "External id": 461690,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452270924.902, "dur": 4.137, + "args": { + "External id": 461691,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452270942.687, "dur": 0.892, + "args": { + "External id": 461692,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452270958.751, "dur": 1.264, + "args": { + "External id": 461693,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452270971.868, "dur": 0.861, + "args": { + "External id": 461694,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452270987.808, "dur": 3.475, + "args": { + "External id": 461695,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452271003.971, "dur": 2.562, + "args": { + "External id": 461696,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452271018.282, "dur": 0.921, + "args": { + "External id": 461697,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452271177.413, "dur": 3645.862, + "args": { + "External id": 461698,"Record function id": 0, "Ev Idx": 4481 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 2338707, "tid": 2379456, + "ts": 6267452271203.742, "dur": 1340.645, + "args": { + "External id": 461699,"Record function id": 0, "Ev Idx": 4482 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338707, "tid": 2379456, + "ts": 6267452271221.650, "dur": 445.634, + "args": { + "External id": 461700,"Record function id": 0, "Ev Idx": 4483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452271331.929, "dur": 5.986, + "args": { + "External id": 461701,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452271342.426, "dur": 0.782, + "args": { + "External id": 461702,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452271345.651, "dur": 3.184, + "args": { + "External id": 461703,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452271351.232, "dur": 1.075, + "args": { + "External id": 461704,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452271354.090, "dur": 4.309, + "args": { + "External id": 461705,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452271359.894, "dur": 0.933, + "args": { + "External id": 461706,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452271362.685, "dur": 2.155, + "args": { + "External id": 461707,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452271368.695, "dur": 0.790, + "args": { + "External id": 461708,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452271371.237, "dur": 0.824, + "args": { + "External id": 461709,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452271373.874, "dur": 0.601, + "args": { + "External id": 461710,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452271396.345, "dur": 230.412, + "args": { + "External id": 461711,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452271415.665, "dur": 203.127, + "args": { + "External id": 461712,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452271450.467, "dur": 22.189, + "args": { + "External id": 461713,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452271479.453, "dur": 84.658, + "args": { + "External id": 461714,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452271482.736, "dur": 81.047, + "args": { + "External id": 461715,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452271487.750, "dur": 6.056, + "args": { + "External id": 461716,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452271495.754, "dur": 67.179, + "args": { + "External id": 461717,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4500 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 2338707, "tid": 2379456, + "ts": 6267452271774.522, "dur": 760.728, + "args": { + "External id": 461718,"Record function id": 0, "Ev Idx": 4501 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338707, "tid": 2379456, + "ts": 6267452271794.141, "dur": 725.495, + "args": { + "External id": 461719,"Record function id": 0, "Ev Idx": 4502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452271868.019, "dur": 6.859, + "args": { + "External id": 461720,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452271893.577, "dur": 38.853, + "args": { + "External id": 461721,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452271899.909, "dur": 2.991, + "args": { + "External id": 461722,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452271905.404, "dur": 0.565, + "args": { + "External id": 461723,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452271907.526, "dur": 0.487, + "args": { + "External id": 461724,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452271911.254, "dur": 0.437, + "args": { + "External id": 461725,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452271913.318, "dur": 0.490, + "args": { + "External id": 461726,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452271915.694, "dur": 2.839, + "args": { + "External id": 461727,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452271921.245, "dur": 0.477, + "args": { + "External id": 461728,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452271923.591, "dur": 0.438, + "args": { + "External id": 461729,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452271925.542, "dur": 1.499, + "args": { + "External id": 461730,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452271944.935, "dur": 50.870, + "args": { + "External id": 461731,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267452272032.931, "dur": 197.709, + "args": { + "External id": 461732,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452272048.650, "dur": 5.291, + "args": { + "External id": 461733,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267452272105.068, "dur": 14.303, + "args": { + "External id": 461734,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267452272110.295, "dur": 8.563, + "args": { + "External id": 461735,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452272115.431, "dur": 1.070, + "args": { + "External id": 461736,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452272130.533, "dur": 35.123, + "args": { + "External id": 461737,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452272133.323, "dur": 0.432, + "args": { + "External id": 461738,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452272135.784, "dur": 0.618, + "args": { + "External id": 461739,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452272138.065, "dur": 4.262, + "args": { + "External id": 461740,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452272143.748, "dur": 0.602, + "args": { + "External id": 461741,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452272146.011, "dur": 0.668, + "args": { + "External id": 461742,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452272149.838, "dur": 0.508, + "args": { + "External id": 461743,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452272155.898, "dur": 0.311, + "args": { + "External id": 461744,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452272158.011, "dur": 0.366, + "args": { + "External id": 461745,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452272160.905, "dur": 0.409, + "args": { + "External id": 461746,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452272179.371, "dur": 41.152, + "args": { + "External id": 461747,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452272286.990, "dur": 144.729, + "args": { + "External id": 461748,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452272322.943, "dur": 104.304, + "args": { + "External id": 461749,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4532, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452272334.724, "dur": 87.375, + "args": { + "External id": 461750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452272454.895, "dur": 2.307, + "args": { + "External id": 461751,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4534, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452272553.497, "dur": 2242.634, + "args": { + "External id": 461752,"Sequence number": 5036382, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4535 + } + }, + { + "ph": "f", "id": 208, "pid": 2338707, "tid": 2379456, "ts": 6267452272553.497, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452272715.328, "dur": 134.254, + "args": { + "External id": 461753,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452272901.433, "dur": 49.501, + "args": { + "External id": 461754,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267452272990.342, "dur": 108.440, + "args": { + "External id": 461755,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452273121.182, "dur": 45.790, + "args": { + "External id": 461756,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452273174.544, "dur": 38.978, + "args": { + "External id": 461757,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452273221.163, "dur": 31.982, + "args": { + "External id": 461758,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452273260.877, "dur": 33.456, + "args": { + "External id": 461759,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452273333.775, "dur": 29.427, + "args": { + "External id": 461760,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452273386.805, "dur": 32.580, + "args": { + "External id": 461761,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452273445.914, "dur": 23.935, + "args": { + "External id": 461762,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452273486.073, "dur": 18.515, + "args": { + "External id": 461763,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452273513.533, "dur": 45.613, + "args": { + "External id": 461764,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452273563.094, "dur": 60.800, + "args": { + "External id": 461765,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267452273665.546, "dur": 372.359, + "args": { + "External id": 461766,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452273789.094, "dur": 12.493, + "args": { + "External id": 461767,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452273804.607, "dur": 3.047, + "args": { + "External id": 461768,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452273809.557, "dur": 1.783, + "args": { + "External id": 461769,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452273813.034, "dur": 3.213, + "args": { + "External id": 461770,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452273877.703, "dur": 10.325, + "args": { + "External id": 461771,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452273884.239, "dur": 3.548, + "args": { + "External id": 461772,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452273892.395, "dur": 60.535, + "args": { + "External id": 461773,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452273899.586, "dur": 4.165, + "args": { + "External id": 461774,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452273954.808, "dur": 2.020, + "args": { + "External id": 461775,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452273955.926, "dur": 0.813, + "args": { + "External id": 461776,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452273957.939, "dur": 24.356, + "args": { + "External id": 461777,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452273962.632, "dur": 0.477, + "args": { + "External id": 461778,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452274131.663, "dur": 37.433, + "args": { + "External id": 461779,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452274192.835, "dur": 22.359, + "args": { + "External id": 461780,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452274224.958, "dur": 64.584, + "args": { + "External id": 461781,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452274298.035, "dur": 50.633, + "args": { + "External id": 461782,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452274361.389, "dur": 26.340, + "args": { + "External id": 461783,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452274394.168, "dur": 37.175, + "args": { + "External id": 461784,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452274439.629, "dur": 32.730, + "args": { + "External id": 461785,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452274482.332, "dur": 33.866, + "args": { + "External id": 461786,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267452274540.269, "dur": 28.637, + "args": { + "External id": 461787,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452274589.855, "dur": 48.682, + "args": { + "External id": 461788,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452274662.777, "dur": 21.231, + "args": { + "External id": 461789,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452274706.307, "dur": 17.601, + "args": { + "External id": 461790,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267452274741.016, "dur": 18.326, + "args": { + "External id": 461791,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274850.460, "dur": 18.478, + "args": { + "External id": 461792,"Record function id": 0, "Ev Idx": 4575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274854.815, "dur": 12.835, + "args": { + "External id": 461793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274859.776, "dur": 6.570, + "args": { + "External id": 461794,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274861.505, "dur": 4.710, + "args": { + "External id": 461795,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274873.823, "dur": 6.016, + "args": { + "External id": 461796,"Record function id": 0, "Ev Idx": 4579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274876.148, "dur": 3.178, + "args": { + "External id": 461797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274877.095, "dur": 1.600, + "args": { + "External id": 461798,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274877.803, "dur": 0.789, + "args": { + "External id": 461799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274883.997, "dur": 7.540, + "args": { + "External id": 461800,"Record function id": 0, "Ev Idx": 4583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274885.767, "dur": 5.237, + "args": { + "External id": 461801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274886.463, "dur": 4.003, + "args": { + "External id": 461802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274887.119, "dur": 3.222, + "args": { + "External id": 461803,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274895.524, "dur": 5.331, + "args": { + "External id": 461804,"Record function id": 0, "Ev Idx": 4587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274897.430, "dur": 2.926, + "args": { + "External id": 461805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274898.167, "dur": 1.639, + "args": { + "External id": 461806,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274898.743, "dur": 0.959, + "args": { + "External id": 461807,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274904.766, "dur": 5.151, + "args": { + "External id": 461808,"Record function id": 0, "Ev Idx": 4591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274906.470, "dur": 2.957, + "args": { + "External id": 461809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274907.289, "dur": 1.649, + "args": { + "External id": 461810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274907.964, "dur": 0.890, + "args": { + "External id": 461811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274913.821, "dur": 5.460, + "args": { + "External id": 461812,"Record function id": 0, "Ev Idx": 4595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274915.411, "dur": 3.368, + "args": { + "External id": 461813,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274916.698, "dur": 1.576, + "args": { + "External id": 461814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274917.496, "dur": 0.692, + "args": { + "External id": 461815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274923.464, "dur": 4.901, + "args": { + "External id": 461816,"Record function id": 0, "Ev Idx": 4599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274924.912, "dur": 2.959, + "args": { + "External id": 461817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274925.671, "dur": 1.694, + "args": { + "External id": 461818,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274926.212, "dur": 1.077, + "args": { + "External id": 461819,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274932.350, "dur": 6.217, + "args": { + "External id": 461820,"Record function id": 0, "Ev Idx": 4603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274933.816, "dur": 4.258, + "args": { + "External id": 461821,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274934.377, "dur": 3.213, + "args": { + "External id": 461822,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274936.710, "dur": 0.800, + "args": { + "External id": 461823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274942.436, "dur": 43.498, + "args": { + "External id": 461824,"Record function id": 0, "Ev Idx": 4607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452274982.718, "dur": 2.548, + "args": { + "External id": 461825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274983.472, "dur": 1.119, + "args": { + "External id": 461826,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452274983.866, "dur": 0.645, + "args": { + "External id": 461827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452274999.485, "dur": 60749.213, + "args": { + "External id": 461828,"Record function id": 0, "Sequence number": 5036381, "Fwd thread id": 1, "Ev Idx": 4611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452275001.977, "dur": 60734.500, + "args": { + "External id": 461829,"Sequence number": 5036381, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4612 + } + }, + { + "ph": "f", "id": 209, "pid": 2338707, "tid": 2379456, "ts": 6267452275001.977, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 2338707, "tid": 2379456, + "ts": 6267452275038.066, "dur": 89.940, + "args": { + "External id": 461830,"Record function id": 0, "Ev Idx": 4613 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 2338707, "tid": 2379456, + "ts": 6267452275139.751, "dur": 85.381, + "args": { + "External id": 461831,"Record function id": 0, "Ev Idx": 4614 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 2338707, "tid": 2379456, + "ts": 6267452275232.804, "dur": 60494.661, + "args": { + "External id": 461832,"Record function id": 0, "Ev Idx": 4615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452275342.027, "dur": 9.438, + "args": { + "External id": 461833,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452275364.916, "dur": 7.956, + "args": { + "External id": 461834,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452275390.092, "dur": 59366.689, + "args": { + "External id": 461835,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452275409.956, "dur": 59330.862, + "args": { + "External id": 461836,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452275528.152, "dur": 28.627, + "args": { + "External id": 461837,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452275581.678, "dur": 59104.062, + "args": { + "External id": 461838,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452275585.255, "dur": 59099.572, + "args": { + "External id": 461839,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452275590.692, "dur": 41.231, + "args": { + "External id": 461840,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452275635.539, "dur": 59045.094, + "args": { + "External id": 461841,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452334890.282, "dur": 13.352, + "args": { + "External id": 461842,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452334894.201, "dur": 8.941, + "args": { + "External id": 461843,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452334940.945, "dur": 406.912, + "args": { + "External id": 461844,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452334981.063, "dur": 360.086, + "args": { + "External id": 461845,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4628, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452334996.082, "dur": 337.132, + "args": { + "External id": 461846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452335377.180, "dur": 2.674, + "args": { + "External id": 461847,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4630, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452335453.577, "dur": 7.744, + "args": { + "External id": 461848,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452335515.797, "dur": 1.792, + "args": { + "External id": 461849,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452335536.087, "dur": 4.248, + "args": { + "External id": 461850,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452335554.230, "dur": 1.124, + "args": { + "External id": 461851,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452335570.269, "dur": 1.159, + "args": { + "External id": 461852,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452335585.467, "dur": 1.378, + "args": { + "External id": 461853,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452335620.937, "dur": 4.359, + "args": { + "External id": 461854,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452335639.749, "dur": 2.477, + "args": { + "External id": 461855,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452335654.235, "dur": 0.661, + "args": { + "External id": 461856,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452335768.274, "dur": 3698.952, + "args": { + "External id": 461857,"Record function id": 0, "Ev Idx": 4640 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 2338707, "tid": 2379456, + "ts": 6267452335792.006, "dur": 1402.742, + "args": { + "External id": 461858,"Record function id": 0, "Ev Idx": 4641 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338707, "tid": 2379456, + "ts": 6267452335809.266, "dur": 467.033, + "args": { + "External id": 461859,"Record function id": 0, "Ev Idx": 4642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452335914.541, "dur": 5.230, + "args": { + "External id": 461860,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452335923.665, "dur": 0.952, + "args": { + "External id": 461861,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452335926.954, "dur": 3.648, + "args": { + "External id": 461862,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452335934.671, "dur": 0.913, + "args": { + "External id": 461863,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452335937.398, "dur": 0.868, + "args": { + "External id": 461864,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452335939.969, "dur": 0.988, + "args": { + "External id": 461865,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452335942.951, "dur": 2.007, + "args": { + "External id": 461866,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452335949.185, "dur": 1.197, + "args": { + "External id": 461867,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452335952.245, "dur": 1.218, + "args": { + "External id": 461868,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452335955.191, "dur": 0.837, + "args": { + "External id": 461869,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452335977.881, "dur": 256.751, + "args": { + "External id": 461870,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452335996.969, "dur": 230.092, + "args": { + "External id": 461871,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452336031.018, "dur": 65.856, + "args": { + "External id": 461872,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452336108.296, "dur": 83.602, + "args": { + "External id": 461873,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452336111.532, "dur": 79.960, + "args": { + "External id": 461874,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336116.881, "dur": 7.713, + "args": { + "External id": 461875,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452336126.724, "dur": 64.045, + "args": { + "External id": 461876,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4659 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 2338707, "tid": 2379456, + "ts": 6267452336387.061, "dur": 798.284, + "args": { + "External id": 461877,"Record function id": 0, "Ev Idx": 4660 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338707, "tid": 2379456, + "ts": 6267452336412.229, "dur": 757.102, + "args": { + "External id": 461878,"Record function id": 0, "Ev Idx": 4661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452336490.162, "dur": 6.058, + "args": { + "External id": 461879,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452336516.133, "dur": 40.530, + "args": { + "External id": 461880,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336522.361, "dur": 4.374, + "args": { + "External id": 461881,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336528.954, "dur": 0.459, + "args": { + "External id": 461882,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336530.858, "dur": 0.659, + "args": { + "External id": 461883,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336534.800, "dur": 0.577, + "args": { + "External id": 461884,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336536.988, "dur": 0.373, + "args": { + "External id": 461885,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336538.809, "dur": 2.868, + "args": { + "External id": 461886,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336544.592, "dur": 0.492, + "args": { + "External id": 461887,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336547.058, "dur": 0.565, + "args": { + "External id": 461888,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336549.229, "dur": 1.986, + "args": { + "External id": 461889,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452336569.911, "dur": 71.578, + "args": { + "External id": 461890,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267452336685.674, "dur": 148.701, + "args": { + "External id": 461891,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452336699.587, "dur": 6.197, + "args": { + "External id": 461892,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267452336712.557, "dur": 13.594, + "args": { + "External id": 461893,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267452336717.749, "dur": 7.856, + "args": { + "External id": 461894,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336722.892, "dur": 0.795, + "args": { + "External id": 461895,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452336735.265, "dur": 32.709, + "args": { + "External id": 461896,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336738.208, "dur": 0.639, + "args": { + "External id": 461897,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336741.046, "dur": 0.515, + "args": { + "External id": 461898,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336743.009, "dur": 4.758, + "args": { + "External id": 461899,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336749.414, "dur": 0.682, + "args": { + "External id": 461900,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336751.570, "dur": 0.530, + "args": { + "External id": 461901,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336754.960, "dur": 0.350, + "args": { + "External id": 461902,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336756.975, "dur": 0.339, + "args": { + "External id": 461903,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336759.058, "dur": 0.339, + "args": { + "External id": 461904,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452336762.414, "dur": 0.629, + "args": { + "External id": 461905,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452336784.174, "dur": 40.707, + "args": { + "External id": 461906,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452336888.345, "dur": 143.502, + "args": { + "External id": 461907,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452336923.601, "dur": 104.200, + "args": { + "External id": 461908,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4691, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452336934.798, "dur": 88.015, + "args": { + "External id": 461909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452337096.311, "dur": 3.982, + "args": { + "External id": 461910,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4693, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452337204.597, "dur": 2236.475, + "args": { + "External id": 461911,"Sequence number": 5036380, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4694 + } + }, + { + "ph": "f", "id": 210, "pid": 2338707, "tid": 2379456, "ts": 6267452337204.597, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452337344.342, "dur": 134.748, + "args": { + "External id": 461912,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452337532.405, "dur": 49.932, + "args": { + "External id": 461913,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267452337624.798, "dur": 71.751, + "args": { + "External id": 461914,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452337712.700, "dur": 37.555, + "args": { + "External id": 461915,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452337775.011, "dur": 45.817, + "args": { + "External id": 461916,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452337829.024, "dur": 33.199, + "args": { + "External id": 461917,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452337875.330, "dur": 34.075, + "args": { + "External id": 461918,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452337949.380, "dur": 29.558, + "args": { + "External id": 461919,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452338006.641, "dur": 33.298, + "args": { + "External id": 461920,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452338111.100, "dur": 29.163, + "args": { + "External id": 461921,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452338158.284, "dur": 17.794, + "args": { + "External id": 461922,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452338187.947, "dur": 54.291, + "args": { + "External id": 461923,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452338247.325, "dur": 41.653, + "args": { + "External id": 461924,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267452338323.836, "dur": 361.264, + "args": { + "External id": 461925,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452338432.460, "dur": 9.772, + "args": { + "External id": 461926,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452338445.291, "dur": 3.501, + "args": { + "External id": 461927,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452338450.429, "dur": 3.920, + "args": { + "External id": 461928,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452338456.053, "dur": 3.975, + "args": { + "External id": 461929,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452338516.612, "dur": 9.440, + "args": { + "External id": 461930,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452338518.985, "dur": 6.845, + "args": { + "External id": 461931,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452338530.464, "dur": 43.324, + "args": { + "External id": 461932,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452338537.329, "dur": 4.586, + "args": { + "External id": 461933,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452338575.447, "dur": 2.292, + "args": { + "External id": 461934,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452338576.863, "dur": 0.743, + "args": { + "External id": 461935,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452338578.996, "dur": 45.250, + "args": { + "External id": 461936,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452338587.452, "dur": 0.880, + "args": { + "External id": 461937,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452338735.678, "dur": 34.227, + "args": { + "External id": 461938,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452338794.335, "dur": 22.093, + "args": { + "External id": 461939,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452338825.943, "dur": 61.179, + "args": { + "External id": 461940,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452338895.233, "dur": 46.893, + "args": { + "External id": 461941,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452338954.363, "dur": 25.873, + "args": { + "External id": 461942,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452338986.526, "dur": 37.704, + "args": { + "External id": 461943,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452339032.446, "dur": 74.666, + "args": { + "External id": 461944,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452339121.817, "dur": 41.843, + "args": { + "External id": 461945,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267452339190.898, "dur": 29.626, + "args": { + "External id": 461946,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452339241.983, "dur": 30.309, + "args": { + "External id": 461947,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452339295.089, "dur": 25.113, + "args": { + "External id": 461948,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452339340.999, "dur": 19.262, + "args": { + "External id": 461949,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267452339381.726, "dur": 20.426, + "args": { + "External id": 461950,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339495.048, "dur": 21.922, + "args": { + "External id": 461951,"Record function id": 0, "Ev Idx": 4734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339499.294, "dur": 16.505, + "args": { + "External id": 461952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339508.236, "dur": 6.367, + "args": { + "External id": 461953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339509.768, "dur": 4.682, + "args": { + "External id": 461954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339521.943, "dur": 6.179, + "args": { + "External id": 461955,"Record function id": 0, "Ev Idx": 4738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339524.169, "dur": 3.336, + "args": { + "External id": 461956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339525.116, "dur": 1.652, + "args": { + "External id": 461957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339525.655, "dur": 0.998, + "args": { + "External id": 461958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339532.391, "dur": 8.296, + "args": { + "External id": 461959,"Record function id": 0, "Ev Idx": 4742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339534.444, "dur": 5.727, + "args": { + "External id": 461960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339535.336, "dur": 4.319, + "args": { + "External id": 461961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339536.060, "dur": 3.487, + "args": { + "External id": 461962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339544.688, "dur": 5.868, + "args": { + "External id": 461963,"Record function id": 0, "Ev Idx": 4746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339546.634, "dur": 3.412, + "args": { + "External id": 461964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339547.683, "dur": 1.879, + "args": { + "External id": 461965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339548.362, "dur": 1.118, + "args": { + "External id": 461966,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339554.469, "dur": 5.635, + "args": { + "External id": 461967,"Record function id": 0, "Ev Idx": 4750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339556.538, "dur": 3.050, + "args": { + "External id": 461968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339557.269, "dur": 1.611, + "args": { + "External id": 461969,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339557.758, "dur": 1.040, + "args": { + "External id": 461970,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339567.602, "dur": 5.381, + "args": { + "External id": 461971,"Record function id": 0, "Ev Idx": 4754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339569.261, "dur": 3.232, + "args": { + "External id": 461972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339570.506, "dur": 1.435, + "args": { + "External id": 461973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339571.107, "dur": 0.749, + "args": { + "External id": 461974,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339577.102, "dur": 4.685, + "args": { + "External id": 461975,"Record function id": 0, "Ev Idx": 4758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339578.706, "dur": 2.599, + "args": { + "External id": 461976,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339579.415, "dur": 1.395, + "args": { + "External id": 461977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339579.756, "dur": 0.969, + "args": { + "External id": 461978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339585.914, "dur": 25.197, + "args": { + "External id": 461979,"Record function id": 0, "Ev Idx": 4762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339587.478, "dur": 22.044, + "args": { + "External id": 461980,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339588.263, "dur": 19.999, + "args": { + "External id": 461981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339590.939, "dur": 16.787, + "args": { + "External id": 461982,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339618.450, "dur": 6.593, + "args": { + "External id": 461983,"Record function id": 0, "Ev Idx": 4766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452339620.621, "dur": 3.886, + "args": { + "External id": 461984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339621.802, "dur": 2.230, + "args": { + "External id": 461985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452339622.716, "dur": 1.230, + "args": { + "External id": 461986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452339630.691, "dur": 63218.789, + "args": { + "External id": 461987,"Record function id": 0, "Sequence number": 5036379, "Fwd thread id": 1, "Ev Idx": 4770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452339632.987, "dur": 63204.962, + "args": { + "External id": 461988,"Sequence number": 5036379, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4771 + } + }, + { + "ph": "f", "id": 211, "pid": 2338707, "tid": 2379456, "ts": 6267452339632.987, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 2338707, "tid": 2379456, + "ts": 6267452339669.633, "dur": 47.986, + "args": { + "External id": 461989,"Record function id": 0, "Ev Idx": 4772 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 2338707, "tid": 2379456, + "ts": 6267452339727.502, "dur": 84.540, + "args": { + "External id": 461990,"Record function id": 0, "Ev Idx": 4773 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 2338707, "tid": 2379456, + "ts": 6267452339819.656, "dur": 63008.240, + "args": { + "External id": 461991,"Record function id": 0, "Ev Idx": 4774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452339929.534, "dur": 8.618, + "args": { + "External id": 461992,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452339950.471, "dur": 8.045, + "args": { + "External id": 461993,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452339979.777, "dur": 61789.058, + "args": { + "External id": 461994,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452339998.771, "dur": 61753.655, + "args": { + "External id": 461995,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452340155.697, "dur": 34.338, + "args": { + "External id": 461996,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452340224.737, "dur": 61475.066, + "args": { + "External id": 461997,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452340228.490, "dur": 61469.954, + "args": { + "External id": 461998,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452340235.430, "dur": 17.481, + "args": { + "External id": 461999,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452340255.736, "dur": 61440.302, + "args": { + "External id": 462000,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452401893.297, "dur": 12.276, + "args": { + "External id": 462001,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452401896.957, "dur": 8.123, + "args": { + "External id": 462002,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452401944.280, "dur": 493.269, + "args": { + "External id": 462003,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452401984.580, "dur": 446.076, + "args": { + "External id": 462004,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4787, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452401999.741, "dur": 423.526, + "args": { + "External id": 462005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452402467.728, "dur": 3.057, + "args": { + "External id": 462006,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4789, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452402548.110, "dur": 8.397, + "args": { + "External id": 462007,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452402637.527, "dur": 2.297, + "args": { + "External id": 462008,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452402660.599, "dur": 3.948, + "args": { + "External id": 462009,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452402679.035, "dur": 1.285, + "args": { + "External id": 462010,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452402696.379, "dur": 1.275, + "args": { + "External id": 462011,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452402709.847, "dur": 1.291, + "args": { + "External id": 462012,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452402722.909, "dur": 3.680, + "args": { + "External id": 462013,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452402738.386, "dur": 2.625, + "args": { + "External id": 462014,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452402752.172, "dur": 0.708, + "args": { + "External id": 462015,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452402869.074, "dur": 3711.938, + "args": { + "External id": 462016,"Record function id": 0, "Ev Idx": 4799 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 2338707, "tid": 2379456, + "ts": 6267452402895.856, "dur": 1419.783, + "args": { + "External id": 462017,"Record function id": 0, "Ev Idx": 4800 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338707, "tid": 2379456, + "ts": 6267452402915.280, "dur": 476.582, + "args": { + "External id": 462018,"Record function id": 0, "Ev Idx": 4801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452403028.730, "dur": 5.266, + "args": { + "External id": 462019,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452403037.822, "dur": 0.983, + "args": { + "External id": 462020,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452403041.006, "dur": 3.644, + "args": { + "External id": 462021,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452403046.879, "dur": 1.085, + "args": { + "External id": 462022,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452403049.764, "dur": 0.893, + "args": { + "External id": 462023,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452403091.936, "dur": 1.794, + "args": { + "External id": 462024,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452403097.707, "dur": 2.575, + "args": { + "External id": 462025,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452403104.977, "dur": 1.019, + "args": { + "External id": 462026,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452403108.202, "dur": 0.757, + "args": { + "External id": 462027,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452403111.071, "dur": 1.212, + "args": { + "External id": 462028,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452403136.483, "dur": 211.987, + "args": { + "External id": 462029,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452403157.133, "dur": 185.412, + "args": { + "External id": 462030,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452403193.719, "dur": 21.068, + "args": { + "External id": 462031,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452403219.177, "dur": 82.509, + "args": { + "External id": 462032,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452403221.972, "dur": 79.334, + "args": { + "External id": 462033,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403227.357, "dur": 7.313, + "args": { + "External id": 462034,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452403236.654, "dur": 63.836, + "args": { + "External id": 462035,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4818 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 2338707, "tid": 2379456, + "ts": 6267452403505.137, "dur": 798.011, + "args": { + "External id": 462036,"Record function id": 0, "Ev Idx": 4819 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338707, "tid": 2379456, + "ts": 6267452403526.612, "dur": 761.727, + "args": { + "External id": 462037,"Record function id": 0, "Ev Idx": 4820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452403628.675, "dur": 8.385, + "args": { + "External id": 462038,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452403656.731, "dur": 41.120, + "args": { + "External id": 462039,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403663.585, "dur": 3.373, + "args": { + "External id": 462040,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403669.719, "dur": 0.679, + "args": { + "External id": 462041,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403672.318, "dur": 0.480, + "args": { + "External id": 462042,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403677.111, "dur": 0.427, + "args": { + "External id": 462043,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403679.202, "dur": 0.600, + "args": { + "External id": 462044,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403681.374, "dur": 2.866, + "args": { + "External id": 462045,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403686.549, "dur": 0.769, + "args": { + "External id": 462046,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403688.992, "dur": 0.433, + "args": { + "External id": 462047,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403690.910, "dur": 1.410, + "args": { + "External id": 462048,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452403710.840, "dur": 54.686, + "args": { + "External id": 462049,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267452403804.521, "dur": 130.471, + "args": { + "External id": 462050,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452403817.034, "dur": 5.218, + "args": { + "External id": 462051,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267452403828.467, "dur": 12.243, + "args": { + "External id": 462052,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267452403833.747, "dur": 6.442, + "args": { + "External id": 462053,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403838.049, "dur": 0.811, + "args": { + "External id": 462054,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452403849.700, "dur": 30.774, + "args": { + "External id": 462055,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403852.478, "dur": 0.462, + "args": { + "External id": 462056,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403855.090, "dur": 0.376, + "args": { + "External id": 462057,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403857.217, "dur": 3.138, + "args": { + "External id": 462058,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403862.225, "dur": 0.695, + "args": { + "External id": 462059,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403864.571, "dur": 0.521, + "args": { + "External id": 462060,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403868.202, "dur": 0.540, + "args": { + "External id": 462061,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403870.400, "dur": 0.482, + "args": { + "External id": 462062,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403872.494, "dur": 0.274, + "args": { + "External id": 462063,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452403875.682, "dur": 0.427, + "args": { + "External id": 462064,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452403893.108, "dur": 32.999, + "args": { + "External id": 462065,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452403990.244, "dur": 199.121, + "args": { + "External id": 462066,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452404025.982, "dur": 157.950, + "args": { + "External id": 462067,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4850, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452404039.229, "dur": 138.926, + "args": { + "External id": 462068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452404215.806, "dur": 2.547, + "args": { + "External id": 462069,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4852, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452404324.666, "dur": 2229.664, + "args": { + "External id": 462070,"Sequence number": 5036378, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4853 + } + }, + { + "ph": "f", "id": 212, "pid": 2338707, "tid": 2379456, "ts": 6267452404324.666, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452404460.382, "dur": 152.112, + "args": { + "External id": 462071,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452404690.348, "dur": 53.491, + "args": { + "External id": 462072,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267452404766.712, "dur": 71.425, + "args": { + "External id": 462073,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452404854.144, "dur": 37.751, + "args": { + "External id": 462074,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452404899.198, "dur": 39.105, + "args": { + "External id": 462075,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452404946.016, "dur": 36.886, + "args": { + "External id": 462076,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452404993.620, "dur": 36.921, + "args": { + "External id": 462077,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452405122.225, "dur": 33.042, + "args": { + "External id": 462078,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452405181.004, "dur": 35.384, + "args": { + "External id": 462079,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452405247.138, "dur": 23.258, + "args": { + "External id": 462080,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452405287.217, "dur": 17.418, + "args": { + "External id": 462081,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452405314.033, "dur": 50.850, + "args": { + "External id": 462082,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452405369.733, "dur": 39.862, + "args": { + "External id": 462083,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267452405469.057, "dur": 342.134, + "args": { + "External id": 462084,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452405564.220, "dur": 8.079, + "args": { + "External id": 462085,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452405575.808, "dur": 3.229, + "args": { + "External id": 462086,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452405580.735, "dur": 2.508, + "args": { + "External id": 462087,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452405584.766, "dur": 5.417, + "args": { + "External id": 462088,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452405674.968, "dur": 6.763, + "args": { + "External id": 462089,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452405677.420, "dur": 3.856, + "args": { + "External id": 462090,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452405686.093, "dur": 47.283, + "args": { + "External id": 462091,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452405693.550, "dur": 5.266, + "args": { + "External id": 462092,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452405735.154, "dur": 2.023, + "args": { + "External id": 462093,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452405736.421, "dur": 0.656, + "args": { + "External id": 462094,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452405738.268, "dur": 19.675, + "args": { + "External id": 462095,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452405741.192, "dur": 0.982, + "args": { + "External id": 462096,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452405863.296, "dur": 33.732, + "args": { + "External id": 462097,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452405924.498, "dur": 20.738, + "args": { + "External id": 462098,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452405955.114, "dur": 60.156, + "args": { + "External id": 462099,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452406022.466, "dur": 88.808, + "args": { + "External id": 462100,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452406129.397, "dur": 30.018, + "args": { + "External id": 462101,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452406166.187, "dur": 37.057, + "args": { + "External id": 462102,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452406211.508, "dur": 31.751, + "args": { + "External id": 462103,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452406250.866, "dur": 34.947, + "args": { + "External id": 462104,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267452406314.946, "dur": 30.700, + "args": { + "External id": 462105,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452406366.331, "dur": 30.721, + "args": { + "External id": 462106,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452406418.228, "dur": 22.939, + "args": { + "External id": 462107,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452406463.619, "dur": 18.360, + "args": { + "External id": 462108,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267452406498.499, "dur": 19.753, + "args": { + "External id": 462109,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406627.308, "dur": 19.667, + "args": { + "External id": 462110,"Record function id": 0, "Ev Idx": 4893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406631.551, "dur": 13.978, + "args": { + "External id": 462111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406636.626, "dur": 7.452, + "args": { + "External id": 462112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406638.738, "dur": 5.073, + "args": { + "External id": 462113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406652.200, "dur": 6.336, + "args": { + "External id": 462114,"Record function id": 0, "Ev Idx": 4897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406654.105, "dur": 3.890, + "args": { + "External id": 462115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406655.712, "dur": 1.765, + "args": { + "External id": 462116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406656.447, "dur": 0.952, + "args": { + "External id": 462117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406662.680, "dur": 7.988, + "args": { + "External id": 462118,"Record function id": 0, "Ev Idx": 4901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406664.307, "dur": 5.828, + "args": { + "External id": 462119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406665.120, "dur": 4.521, + "args": { + "External id": 462120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406666.005, "dur": 3.515, + "args": { + "External id": 462121,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406674.814, "dur": 6.041, + "args": { + "External id": 462122,"Record function id": 0, "Ev Idx": 4905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406676.511, "dur": 3.818, + "args": { + "External id": 462123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406677.724, "dur": 2.129, + "args": { + "External id": 462124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406678.498, "dur": 1.264, + "args": { + "External id": 462125,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406684.687, "dur": 5.309, + "args": { + "External id": 462126,"Record function id": 0, "Ev Idx": 4909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406686.484, "dur": 3.014, + "args": { + "External id": 462127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406687.288, "dur": 1.445, + "args": { + "External id": 462128,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406687.792, "dur": 0.858, + "args": { + "External id": 462129,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406693.790, "dur": 5.310, + "args": { + "External id": 462130,"Record function id": 0, "Ev Idx": 4913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406695.367, "dur": 3.167, + "args": { + "External id": 462131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406696.227, "dur": 1.569, + "args": { + "External id": 462132,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406696.953, "dur": 0.762, + "args": { + "External id": 462133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406703.215, "dur": 4.462, + "args": { + "External id": 462134,"Record function id": 0, "Ev Idx": 4917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406704.485, "dur": 2.675, + "args": { + "External id": 462135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406705.308, "dur": 1.328, + "args": { + "External id": 462136,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406705.850, "dur": 0.705, + "args": { + "External id": 462137,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406711.503, "dur": 6.785, + "args": { + "External id": 462138,"Record function id": 0, "Ev Idx": 4921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406712.895, "dur": 4.866, + "args": { + "External id": 462139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406713.872, "dur": 3.312, + "args": { + "External id": 462140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406716.344, "dur": 0.715, + "args": { + "External id": 462141,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406722.495, "dur": 4.777, + "args": { + "External id": 462142,"Record function id": 0, "Ev Idx": 4925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452406724.108, "dur": 2.667, + "args": { + "External id": 462143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406725.090, "dur": 1.177, + "args": { + "External id": 462144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452406725.543, "dur": 0.624, + "args": { + "External id": 462145,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452406732.616, "dur": 61690.381, + "args": { + "External id": 462146,"Record function id": 0, "Sequence number": 5036377, "Fwd thread id": 1, "Ev Idx": 4929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452406735.167, "dur": 61677.404, + "args": { + "External id": 462147,"Sequence number": 5036377, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4930 + } + }, + { + "ph": "f", "id": 213, "pid": 2338707, "tid": 2379456, "ts": 6267452406735.167, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 2338707, "tid": 2379456, + "ts": 6267452406773.331, "dur": 46.330, + "args": { + "External id": 462148,"Record function id": 0, "Ev Idx": 4931 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 2338707, "tid": 2379456, + "ts": 6267452406829.474, "dur": 78.990, + "args": { + "External id": 462149,"Record function id": 0, "Ev Idx": 4932 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 2338707, "tid": 2379456, + "ts": 6267452406915.812, "dur": 61486.991, + "args": { + "External id": 462150,"Record function id": 0, "Ev Idx": 4933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452407026.762, "dur": 7.946, + "args": { + "External id": 462151,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452407046.667, "dur": 7.924, + "args": { + "External id": 462152,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452407116.430, "dur": 60179.657, + "args": { + "External id": 462153,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452407134.437, "dur": 60145.477, + "args": { + "External id": 462154,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452407254.042, "dur": 29.968, + "args": { + "External id": 462155,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452407309.545, "dur": 59922.067, + "args": { + "External id": 462156,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452407316.134, "dur": 59914.650, + "args": { + "External id": 462157,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452407323.495, "dur": 14.827, + "args": { + "External id": 462158,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452407344.445, "dur": 59882.437, + "args": { + "External id": 462159,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452467424.326, "dur": 12.889, + "args": { + "External id": 462160,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452467428.187, "dur": 8.506, + "args": { + "External id": 462161,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452467474.513, "dur": 514.518, + "args": { + "External id": 462162,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452467513.795, "dur": 467.726, + "args": { + "External id": 462163,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4946, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452467530.160, "dur": 444.627, + "args": { + "External id": 462164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452468019.567, "dur": 2.943, + "args": { + "External id": 462165,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4948, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452468146.047, "dur": 8.751, + "args": { + "External id": 462166,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452468214.015, "dur": 1.696, + "args": { + "External id": 462167,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452468234.335, "dur": 4.526, + "args": { + "External id": 462168,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452468254.119, "dur": 1.199, + "args": { + "External id": 462169,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452468268.663, "dur": 0.955, + "args": { + "External id": 462170,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452468281.919, "dur": 1.190, + "args": { + "External id": 462171,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452468297.663, "dur": 3.669, + "args": { + "External id": 462172,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452468313.101, "dur": 2.637, + "args": { + "External id": 462173,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452468326.551, "dur": 0.927, + "args": { + "External id": 462174,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452468442.698, "dur": 3690.489, + "args": { + "External id": 462175,"Record function id": 0, "Ev Idx": 4958 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 2338707, "tid": 2379456, + "ts": 6267452468467.536, "dur": 1394.953, + "args": { + "External id": 462176,"Record function id": 0, "Ev Idx": 4959 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338707, "tid": 2379456, + "ts": 6267452468485.458, "dur": 451.813, + "args": { + "External id": 462177,"Record function id": 0, "Ev Idx": 4960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452468618.610, "dur": 6.060, + "args": { + "External id": 462178,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452468629.182, "dur": 1.209, + "args": { + "External id": 462179,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452468632.764, "dur": 3.426, + "args": { + "External id": 462180,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452468640.745, "dur": 0.933, + "args": { + "External id": 462181,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452468643.599, "dur": 1.209, + "args": { + "External id": 462182,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452468646.790, "dur": 0.880, + "args": { + "External id": 462183,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452468649.558, "dur": 2.375, + "args": { + "External id": 462184,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452468655.518, "dur": 0.763, + "args": { + "External id": 462185,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452468658.535, "dur": 1.008, + "args": { + "External id": 462186,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452468662.150, "dur": 1.035, + "args": { + "External id": 462187,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452468684.577, "dur": 212.734, + "args": { + "External id": 462188,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452468707.087, "dur": 183.566, + "args": { + "External id": 462189,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452468742.530, "dur": 24.248, + "args": { + "External id": 462190,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452468773.234, "dur": 81.078, + "args": { + "External id": 462191,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452468776.298, "dur": 77.506, + "args": { + "External id": 462192,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452468781.197, "dur": 7.022, + "args": { + "External id": 462193,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452468790.404, "dur": 62.487, + "args": { + "External id": 462194,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4977 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 2338707, "tid": 2379456, + "ts": 6267452469096.470, "dur": 756.866, + "args": { + "External id": 462195,"Record function id": 0, "Ev Idx": 4978 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338707, "tid": 2379456, + "ts": 6267452469123.125, "dur": 712.040, + "args": { + "External id": 462196,"Record function id": 0, "Ev Idx": 4979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452469207.606, "dur": 7.889, + "args": { + "External id": 462197,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452469234.725, "dur": 38.860, + "args": { + "External id": 462198,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469240.965, "dur": 3.167, + "args": { + "External id": 462199,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469246.538, "dur": 0.520, + "args": { + "External id": 462200,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469248.790, "dur": 0.621, + "args": { + "External id": 462201,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469252.712, "dur": 0.407, + "args": { + "External id": 462202,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469254.511, "dur": 0.514, + "args": { + "External id": 462203,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469256.583, "dur": 3.103, + "args": { + "External id": 462204,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469262.507, "dur": 0.413, + "args": { + "External id": 462205,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469264.552, "dur": 0.408, + "args": { + "External id": 462206,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469266.600, "dur": 1.472, + "args": { + "External id": 462207,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452469286.826, "dur": 54.969, + "args": { + "External id": 462208,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267452469380.152, "dur": 135.906, + "args": { + "External id": 462209,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452469393.164, "dur": 4.488, + "args": { + "External id": 462210,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267452469404.065, "dur": 12.007, + "args": { + "External id": 462211,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267452469409.099, "dur": 6.365, + "args": { + "External id": 462212,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469413.139, "dur": 0.788, + "args": { + "External id": 462213,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452469424.391, "dur": 30.915, + "args": { + "External id": 462214,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469427.595, "dur": 0.514, + "args": { + "External id": 462215,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469430.357, "dur": 0.395, + "args": { + "External id": 462216,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469432.420, "dur": 3.859, + "args": { + "External id": 462217,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469437.984, "dur": 0.688, + "args": { + "External id": 462218,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469440.085, "dur": 0.534, + "args": { + "External id": 462219,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469443.229, "dur": 0.309, + "args": { + "External id": 462220,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469445.148, "dur": 0.738, + "args": { + "External id": 462221,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469447.352, "dur": 0.367, + "args": { + "External id": 462222,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452469450.845, "dur": 0.347, + "args": { + "External id": 462223,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452469470.271, "dur": 36.305, + "args": { + "External id": 462224,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452469569.432, "dur": 169.794, + "args": { + "External id": 462225,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452469625.179, "dur": 109.205, + "args": { + "External id": 462226,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5009, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452469638.176, "dur": 91.254, + "args": { + "External id": 462227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452469763.702, "dur": 2.185, + "args": { + "External id": 462228,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5011, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452469871.914, "dur": 2232.914, + "args": { + "External id": 462229,"Sequence number": 5036376, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5012 + } + }, + { + "ph": "f", "id": 214, "pid": 2338707, "tid": 2379456, "ts": 6267452469871.914, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452470017.642, "dur": 179.233, + "args": { + "External id": 462230,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452470255.640, "dur": 50.968, + "args": { + "External id": 462231,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267452470330.409, "dur": 67.243, + "args": { + "External id": 462232,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452470412.787, "dur": 40.835, + "args": { + "External id": 462233,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452470461.292, "dur": 38.150, + "args": { + "External id": 462234,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452470507.078, "dur": 31.607, + "args": { + "External id": 462235,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452470548.704, "dur": 31.977, + "args": { + "External id": 462236,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452470633.076, "dur": 30.623, + "args": { + "External id": 462237,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452470696.878, "dur": 32.595, + "args": { + "External id": 462238,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452470761.639, "dur": 24.432, + "args": { + "External id": 462239,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452470802.383, "dur": 17.602, + "args": { + "External id": 462240,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452470828.865, "dur": 50.375, + "args": { + "External id": 462241,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452470883.610, "dur": 37.887, + "args": { + "External id": 462242,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267452470972.823, "dur": 371.927, + "args": { + "External id": 462243,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452471120.836, "dur": 11.551, + "args": { + "External id": 462244,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452471135.901, "dur": 3.083, + "args": { + "External id": 462245,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452471140.967, "dur": 2.455, + "args": { + "External id": 462246,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452471144.965, "dur": 4.163, + "args": { + "External id": 462247,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452471210.925, "dur": 6.249, + "args": { + "External id": 462248,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452471213.554, "dur": 3.350, + "args": { + "External id": 462249,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452471219.691, "dur": 44.399, + "args": { + "External id": 462250,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452471228.133, "dur": 4.486, + "args": { + "External id": 462251,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452471268.349, "dur": 2.089, + "args": { + "External id": 462252,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452471269.537, "dur": 0.777, + "args": { + "External id": 462253,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452471271.571, "dur": 18.359, + "args": { + "External id": 462254,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452471275.752, "dur": 0.505, + "args": { + "External id": 462255,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452471394.758, "dur": 31.114, + "args": { + "External id": 462256,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452471448.388, "dur": 22.311, + "args": { + "External id": 462257,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452471479.905, "dur": 60.338, + "args": { + "External id": 462258,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452471547.727, "dur": 77.522, + "args": { + "External id": 462259,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452471645.192, "dur": 31.053, + "args": { + "External id": 462260,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452471682.559, "dur": 39.223, + "args": { + "External id": 462261,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452471730.196, "dur": 32.163, + "args": { + "External id": 462262,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452471770.099, "dur": 36.268, + "args": { + "External id": 462263,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267452471833.586, "dur": 30.435, + "args": { + "External id": 462264,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452471885.404, "dur": 28.635, + "args": { + "External id": 462265,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452471932.261, "dur": 20.138, + "args": { + "External id": 462266,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452471974.053, "dur": 16.427, + "args": { + "External id": 462267,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267452472006.626, "dur": 19.802, + "args": { + "External id": 462268,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472161.409, "dur": 18.556, + "args": { + "External id": 462269,"Record function id": 0, "Ev Idx": 5052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472165.233, "dur": 13.573, + "args": { + "External id": 462270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472170.348, "dur": 7.144, + "args": { + "External id": 462271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472172.394, "dur": 4.979, + "args": { + "External id": 462272,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472185.000, "dur": 6.079, + "args": { + "External id": 462273,"Record function id": 0, "Ev Idx": 5056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472186.873, "dur": 3.626, + "args": { + "External id": 462274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472187.845, "dur": 2.069, + "args": { + "External id": 462275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472188.560, "dur": 1.248, + "args": { + "External id": 462276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472195.403, "dur": 8.572, + "args": { + "External id": 462277,"Record function id": 0, "Ev Idx": 5060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472197.101, "dur": 6.304, + "args": { + "External id": 462278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472198.167, "dur": 4.730, + "args": { + "External id": 462279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472198.941, "dur": 3.842, + "args": { + "External id": 462280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472207.932, "dur": 5.875, + "args": { + "External id": 462281,"Record function id": 0, "Ev Idx": 5064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472209.587, "dur": 3.708, + "args": { + "External id": 462282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472210.958, "dur": 1.814, + "args": { + "External id": 462283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472211.542, "dur": 1.153, + "args": { + "External id": 462284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472217.651, "dur": 4.949, + "args": { + "External id": 462285,"Record function id": 0, "Ev Idx": 5068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472219.273, "dur": 2.796, + "args": { + "External id": 462286,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472220.149, "dur": 1.285, + "args": { + "External id": 462287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472220.573, "dur": 0.777, + "args": { + "External id": 462288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472226.522, "dur": 7.195, + "args": { + "External id": 462289,"Record function id": 0, "Ev Idx": 5072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472227.955, "dur": 5.272, + "args": { + "External id": 462290,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472228.945, "dur": 3.757, + "args": { + "External id": 462291,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472231.886, "dur": 0.735, + "args": { + "External id": 462292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472237.764, "dur": 4.609, + "args": { + "External id": 462293,"Record function id": 0, "Ev Idx": 5076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472239.151, "dur": 2.736, + "args": { + "External id": 462294,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472239.860, "dur": 1.434, + "args": { + "External id": 462295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472240.484, "dur": 0.734, + "args": { + "External id": 462296,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472246.194, "dur": 4.157, + "args": { + "External id": 462297,"Record function id": 0, "Ev Idx": 5080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472247.540, "dur": 2.318, + "args": { + "External id": 462298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472248.247, "dur": 1.107, + "args": { + "External id": 462299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472248.597, "dur": 0.674, + "args": { + "External id": 462300,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472254.383, "dur": 4.356, + "args": { + "External id": 462301,"Record function id": 0, "Ev Idx": 5084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452472255.689, "dur": 2.550, + "args": { + "External id": 462302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472256.412, "dur": 1.320, + "args": { + "External id": 462303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452472256.847, "dur": 0.805, + "args": { + "External id": 462304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452472263.938, "dur": 61421.480, + "args": { + "External id": 462305,"Record function id": 0, "Sequence number": 5036375, "Fwd thread id": 1, "Ev Idx": 5088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452472266.028, "dur": 61406.980, + "args": { + "External id": 462306,"Sequence number": 5036375, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5089 + } + }, + { + "ph": "f", "id": 215, "pid": 2338707, "tid": 2379456, "ts": 6267452472266.028, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 2338707, "tid": 2379456, + "ts": 6267452472305.636, "dur": 46.591, + "args": { + "External id": 462307,"Record function id": 0, "Ev Idx": 5090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 2338707, "tid": 2379456, + "ts": 6267452472361.741, "dur": 78.560, + "args": { + "External id": 462308,"Record function id": 0, "Ev Idx": 5091 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 2338707, "tid": 2379456, + "ts": 6267452472447.656, "dur": 61215.273, + "args": { + "External id": 462309,"Record function id": 0, "Ev Idx": 5092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452472555.428, "dur": 8.678, + "args": { + "External id": 462310,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452472576.163, "dur": 8.156, + "args": { + "External id": 462311,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452472623.783, "dur": 59960.316, + "args": { + "External id": 462312,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452472642.248, "dur": 59925.349, + "args": { + "External id": 462313,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452472768.829, "dur": 26.357, + "args": { + "External id": 462314,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452472824.088, "dur": 59687.640, + "args": { + "External id": 462315,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452472827.648, "dur": 59683.198, + "args": { + "External id": 462316,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452472833.316, "dur": 17.336, + "args": { + "External id": 462317,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452472853.009, "dur": 59654.337, + "args": { + "External id": 462318,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452532727.656, "dur": 13.759, + "args": { + "External id": 462319,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452532731.859, "dur": 8.820, + "args": { + "External id": 462320,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452532779.936, "dur": 496.028, + "args": { + "External id": 462321,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452532819.347, "dur": 449.476, + "args": { + "External id": 462322,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5105, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452532834.811, "dur": 426.251, + "args": { + "External id": 462323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452533308.712, "dur": 2.925, + "args": { + "External id": 462324,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5107, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452533390.248, "dur": 8.013, + "args": { + "External id": 462325,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452533456.187, "dur": 1.571, + "args": { + "External id": 462326,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452533475.764, "dur": 3.773, + "args": { + "External id": 462327,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452533494.270, "dur": 1.128, + "args": { + "External id": 462328,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452533509.659, "dur": 4.170, + "args": { + "External id": 462329,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452533526.509, "dur": 0.887, + "args": { + "External id": 462330,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452533539.923, "dur": 3.267, + "args": { + "External id": 462331,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452533555.385, "dur": 2.885, + "args": { + "External id": 462332,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452533572.902, "dur": 0.956, + "args": { + "External id": 462333,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452533705.166, "dur": 3661.849, + "args": { + "External id": 462334,"Record function id": 0, "Ev Idx": 5117 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 2338707, "tid": 2379456, + "ts": 6267452533728.363, "dur": 1429.468, + "args": { + "External id": 462335,"Record function id": 0, "Ev Idx": 5118 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338707, "tid": 2379456, + "ts": 6267452533745.620, "dur": 494.688, + "args": { + "External id": 462336,"Record function id": 0, "Ev Idx": 5119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452533890.059, "dur": 6.597, + "args": { + "External id": 462337,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452533901.754, "dur": 0.835, + "args": { + "External id": 462338,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452533904.748, "dur": 3.546, + "args": { + "External id": 462339,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452533912.416, "dur": 0.753, + "args": { + "External id": 462340,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452533914.924, "dur": 0.924, + "args": { + "External id": 462341,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452533917.484, "dur": 0.804, + "args": { + "External id": 462342,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452533920.403, "dur": 2.000, + "args": { + "External id": 462343,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452533926.274, "dur": 0.743, + "args": { + "External id": 462344,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452533928.786, "dur": 0.852, + "args": { + "External id": 462345,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452533931.397, "dur": 0.664, + "args": { + "External id": 462346,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452533953.385, "dur": 244.026, + "args": { + "External id": 462347,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452533972.889, "dur": 217.165, + "args": { + "External id": 462348,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452534006.163, "dur": 19.328, + "args": { + "External id": 462349,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452534029.478, "dur": 123.084, + "args": { + "External id": 462350,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452534032.219, "dur": 119.805, + "args": { + "External id": 462351,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534036.683, "dur": 6.358, + "args": { + "External id": 462352,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452534044.670, "dur": 105.727, + "args": { + "External id": 462353,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5136 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 2338707, "tid": 2379456, + "ts": 6267452534355.701, "dur": 792.859, + "args": { + "External id": 462354,"Record function id": 0, "Ev Idx": 5137 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338707, "tid": 2379456, + "ts": 6267452534376.951, "dur": 756.048, + "args": { + "External id": 462355,"Record function id": 0, "Ev Idx": 5138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452534458.623, "dur": 6.906, + "args": { + "External id": 462356,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452534485.171, "dur": 39.590, + "args": { + "External id": 462357,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534491.652, "dur": 1.991, + "args": { + "External id": 462358,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534496.465, "dur": 2.656, + "args": { + "External id": 462359,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534500.759, "dur": 0.372, + "args": { + "External id": 462360,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534503.038, "dur": 0.593, + "args": { + "External id": 462361,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534506.741, "dur": 0.490, + "args": { + "External id": 462362,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534508.948, "dur": 3.403, + "args": { + "External id": 462363,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534514.002, "dur": 0.525, + "args": { + "External id": 462364,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534517.311, "dur": 0.574, + "args": { + "External id": 462365,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534519.237, "dur": 0.554, + "args": { + "External id": 462366,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452534537.223, "dur": 48.315, + "args": { + "External id": 462367,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267452534645.104, "dur": 150.805, + "args": { + "External id": 462368,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 5151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452534658.406, "dur": 5.503, + "args": { + "External id": 462369,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267452534670.671, "dur": 13.323, + "args": { + "External id": 462370,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267452534676.119, "dur": 7.393, + "args": { + "External id": 462371,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 5154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534680.547, "dur": 1.174, + "args": { + "External id": 462372,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452534692.809, "dur": 33.022, + "args": { + "External id": 462373,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534695.602, "dur": 0.437, + "args": { + "External id": 462374,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534700.662, "dur": 0.341, + "args": { + "External id": 462375,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534702.679, "dur": 3.067, + "args": { + "External id": 462376,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534707.486, "dur": 1.359, + "args": { + "External id": 462377,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534710.456, "dur": 0.361, + "args": { + "External id": 462378,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534712.843, "dur": 0.312, + "args": { + "External id": 462379,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534716.832, "dur": 0.336, + "args": { + "External id": 462380,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534718.809, "dur": 0.390, + "args": { + "External id": 462381,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452534721.031, "dur": 0.424, + "args": { + "External id": 462382,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452534743.825, "dur": 38.924, + "args": { + "External id": 462383,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452534856.591, "dur": 147.794, + "args": { + "External id": 462384,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452534894.621, "dur": 105.792, + "args": { + "External id": 462385,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5168, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452534907.444, "dur": 88.095, + "args": { + "External id": 462386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452535025.211, "dur": 2.451, + "args": { + "External id": 462387,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5170, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452535167.427, "dur": 2173.384, + "args": { + "External id": 462388,"Sequence number": 5036374, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5171 + } + }, + { + "ph": "f", "id": 216, "pid": 2338707, "tid": 2379456, "ts": 6267452535167.427, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452535307.000, "dur": 134.492, + "args": { + "External id": 462389,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452535496.272, "dur": 50.361, + "args": { + "External id": 462390,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267452535567.615, "dur": 85.939, + "args": { + "External id": 462391,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452535672.248, "dur": 40.974, + "args": { + "External id": 462392,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452535720.496, "dur": 40.352, + "args": { + "External id": 462393,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452535767.923, "dur": 32.717, + "args": { + "External id": 462394,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452535810.092, "dur": 34.939, + "args": { + "External id": 462395,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452535879.584, "dur": 31.157, + "args": { + "External id": 462396,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452535936.256, "dur": 35.386, + "args": { + "External id": 462397,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452536004.013, "dur": 21.505, + "args": { + "External id": 462398,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452536040.793, "dur": 55.911, + "args": { + "External id": 462399,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452536118.853, "dur": 59.865, + "args": { + "External id": 462400,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452536192.136, "dur": 55.404, + "args": { + "External id": 462401,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267452536290.060, "dur": 334.977, + "args": { + "External id": 462402,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452536397.091, "dur": 7.284, + "args": { + "External id": 462403,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452536407.144, "dur": 2.886, + "args": { + "External id": 462404,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452536411.804, "dur": 2.755, + "args": { + "External id": 462405,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452536416.064, "dur": 3.486, + "args": { + "External id": 462406,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452536471.812, "dur": 5.689, + "args": { + "External id": 462407,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452536473.996, "dur": 3.286, + "args": { + "External id": 462408,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452536479.808, "dur": 44.905, + "args": { + "External id": 462409,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452536490.156, "dur": 4.978, + "args": { + "External id": 462410,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452536526.582, "dur": 4.960, + "args": { + "External id": 462411,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452536530.463, "dur": 0.916, + "args": { + "External id": 462412,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452536532.808, "dur": 20.622, + "args": { + "External id": 462413,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452536537.330, "dur": 0.560, + "args": { + "External id": 462414,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452536676.537, "dur": 36.890, + "args": { + "External id": 462415,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452536735.077, "dur": 21.033, + "args": { + "External id": 462416,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452536765.664, "dur": 54.259, + "args": { + "External id": 462417,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452536827.591, "dur": 44.816, + "args": { + "External id": 462418,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452536883.728, "dur": 26.099, + "args": { + "External id": 462419,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452536916.573, "dur": 36.609, + "args": { + "External id": 462420,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452536961.377, "dur": 32.309, + "args": { + "External id": 462421,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452537001.304, "dur": 34.386, + "args": { + "External id": 462422,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267452537100.360, "dur": 33.337, + "args": { + "External id": 462423,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452537158.352, "dur": 29.388, + "args": { + "External id": 462424,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452537208.610, "dur": 21.378, + "args": { + "External id": 462425,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452537251.316, "dur": 17.180, + "args": { + "External id": 462426,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267452537287.593, "dur": 17.523, + "args": { + "External id": 462427,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537393.973, "dur": 17.936, + "args": { + "External id": 462428,"Record function id": 0, "Ev Idx": 5211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537398.132, "dur": 12.556, + "args": { + "External id": 462429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537403.114, "dur": 6.315, + "args": { + "External id": 462430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537404.898, "dur": 4.417, + "args": { + "External id": 462431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537416.942, "dur": 5.662, + "args": { + "External id": 462432,"Record function id": 0, "Ev Idx": 5215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537418.763, "dur": 3.263, + "args": { + "External id": 462433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537419.745, "dur": 1.625, + "args": { + "External id": 462434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537420.306, "dur": 0.957, + "args": { + "External id": 462435,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537426.715, "dur": 7.782, + "args": { + "External id": 462436,"Record function id": 0, "Ev Idx": 5219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537428.359, "dur": 5.567, + "args": { + "External id": 462437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537429.065, "dur": 4.251, + "args": { + "External id": 462438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537429.783, "dur": 3.405, + "args": { + "External id": 462439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537438.646, "dur": 5.116, + "args": { + "External id": 462440,"Record function id": 0, "Ev Idx": 5223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537440.310, "dur": 2.923, + "args": { + "External id": 462441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537440.947, "dur": 1.662, + "args": { + "External id": 462442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537441.446, "dur": 1.087, + "args": { + "External id": 462443,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537447.570, "dur": 4.314, + "args": { + "External id": 462444,"Record function id": 0, "Ev Idx": 5227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537449.007, "dur": 2.391, + "args": { + "External id": 462445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537449.617, "dur": 1.223, + "args": { + "External id": 462446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537450.057, "dur": 0.702, + "args": { + "External id": 462447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537455.750, "dur": 4.679, + "args": { + "External id": 462448,"Record function id": 0, "Ev Idx": 5231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537457.286, "dur": 2.628, + "args": { + "External id": 462449,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537457.889, "dur": 1.482, + "args": { + "External id": 462450,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537458.607, "dur": 0.683, + "args": { + "External id": 462451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537464.297, "dur": 4.500, + "args": { + "External id": 462452,"Record function id": 0, "Ev Idx": 5235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537465.697, "dur": 2.608, + "args": { + "External id": 462453,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537466.522, "dur": 1.281, + "args": { + "External id": 462454,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537466.962, "dur": 0.759, + "args": { + "External id": 462455,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537472.537, "dur": 4.416, + "args": { + "External id": 462456,"Record function id": 0, "Ev Idx": 5239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537474.007, "dur": 2.465, + "args": { + "External id": 462457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537474.725, "dur": 1.219, + "args": { + "External id": 462458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537475.088, "dur": 0.775, + "args": { + "External id": 462459,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537481.336, "dur": 6.644, + "args": { + "External id": 462460,"Record function id": 0, "Ev Idx": 5243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452537482.824, "dur": 4.663, + "args": { + "External id": 462461,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537483.379, "dur": 3.624, + "args": { + "External id": 462462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452537486.120, "dur": 0.803, + "args": { + "External id": 462463,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452537493.327, "dur": 61952.097, + "args": { + "External id": 462464,"Record function id": 0, "Sequence number": 5036373, "Fwd thread id": 1, "Ev Idx": 5247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452537495.278, "dur": 61938.691, + "args": { + "External id": 462465,"Sequence number": 5036373, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5248 + } + }, + { + "ph": "f", "id": 217, "pid": 2338707, "tid": 2379456, "ts": 6267452537495.278, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 2338707, "tid": 2379456, + "ts": 6267452537529.689, "dur": 52.278, + "args": { + "External id": 462466,"Record function id": 0, "Ev Idx": 5249 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 2338707, "tid": 2379456, + "ts": 6267452537608.830, "dur": 86.740, + "args": { + "External id": 462467,"Record function id": 0, "Ev Idx": 5250 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 2338707, "tid": 2379456, + "ts": 6267452537707.707, "dur": 61716.260, + "args": { + "External id": 462468,"Record function id": 0, "Ev Idx": 5251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452537819.051, "dur": 8.886, + "args": { + "External id": 462469,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452537850.579, "dur": 9.963, + "args": { + "External id": 462470,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452537885.106, "dur": 60420.840, + "args": { + "External id": 462471,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452537906.872, "dur": 60382.999, + "args": { + "External id": 462472,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452538021.024, "dur": 21.377, + "args": { + "External id": 462473,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452538112.623, "dur": 60124.850, + "args": { + "External id": 462474,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452538116.215, "dur": 60120.079, + "args": { + "External id": 462475,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452538122.568, "dur": 17.807, + "args": { + "External id": 462476,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452538146.432, "dur": 60084.482, + "args": { + "External id": 462477,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452598430.651, "dur": 13.231, + "args": { + "External id": 462478,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452598434.713, "dur": 8.715, + "args": { + "External id": 462479,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452598481.382, "dur": 517.407, + "args": { + "External id": 462480,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452598520.807, "dur": 471.029, + "args": { + "External id": 462481,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5264, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452598536.516, "dur": 447.293, + "args": { + "External id": 462482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452599031.212, "dur": 3.113, + "args": { + "External id": 462483,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5266, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452599165.319, "dur": 8.751, + "args": { + "External id": 462484,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452599230.185, "dur": 1.528, + "args": { + "External id": 462485,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452599251.128, "dur": 3.978, + "args": { + "External id": 462486,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452599269.524, "dur": 1.027, + "args": { + "External id": 462487,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452599287.666, "dur": 1.090, + "args": { + "External id": 462488,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452599301.918, "dur": 0.797, + "args": { + "External id": 462489,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452599317.180, "dur": 3.262, + "args": { + "External id": 462490,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452599331.830, "dur": 2.812, + "args": { + "External id": 462491,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452599345.964, "dur": 0.710, + "args": { + "External id": 462492,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452599464.169, "dur": 3708.278, + "args": { + "External id": 462493,"Record function id": 0, "Ev Idx": 5276 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 2338707, "tid": 2379456, + "ts": 6267452599489.264, "dur": 1384.280, + "args": { + "External id": 462494,"Record function id": 0, "Ev Idx": 5277 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338707, "tid": 2379456, + "ts": 6267452599507.441, "dur": 441.307, + "args": { + "External id": 462495,"Record function id": 0, "Ev Idx": 5278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452599641.404, "dur": 5.866, + "args": { + "External id": 462496,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452599651.615, "dur": 0.977, + "args": { + "External id": 462497,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452599654.664, "dur": 3.277, + "args": { + "External id": 462498,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452599659.949, "dur": 0.787, + "args": { + "External id": 462499,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452599664.711, "dur": 0.916, + "args": { + "External id": 462500,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452599667.451, "dur": 0.807, + "args": { + "External id": 462501,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452599670.252, "dur": 2.009, + "args": { + "External id": 462502,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452599674.108, "dur": 0.961, + "args": { + "External id": 462503,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452599679.578, "dur": 0.753, + "args": { + "External id": 462504,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452599681.953, "dur": 0.904, + "args": { + "External id": 462505,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452599705.185, "dur": 204.760, + "args": { + "External id": 462506,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452599724.413, "dur": 179.344, + "args": { + "External id": 462507,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452599757.813, "dur": 20.811, + "args": { + "External id": 462508,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452599783.031, "dur": 85.767, + "args": { + "External id": 462509,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452599788.984, "dur": 79.294, + "args": { + "External id": 462510,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452599793.555, "dur": 7.423, + "args": { + "External id": 462511,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452599802.980, "dur": 64.566, + "args": { + "External id": 462512,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5295 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 2338707, "tid": 2379456, + "ts": 6267452600105.627, "dur": 758.201, + "args": { + "External id": 462513,"Record function id": 0, "Ev Idx": 5296 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338707, "tid": 2379456, + "ts": 6267452600128.679, "dur": 720.621, + "args": { + "External id": 462514,"Record function id": 0, "Ev Idx": 5297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452600216.850, "dur": 7.943, + "args": { + "External id": 462515,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452600244.686, "dur": 43.735, + "args": { + "External id": 462516,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600250.777, "dur": 3.689, + "args": { + "External id": 462517,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600263.600, "dur": 0.587, + "args": { + "External id": 462518,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600265.702, "dur": 0.410, + "args": { + "External id": 462519,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600268.861, "dur": 0.531, + "args": { + "External id": 462520,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600270.732, "dur": 0.356, + "args": { + "External id": 462521,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600272.326, "dur": 3.884, + "args": { + "External id": 462522,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600277.510, "dur": 0.452, + "args": { + "External id": 462523,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600279.001, "dur": 0.403, + "args": { + "External id": 462524,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600282.473, "dur": 0.374, + "args": { + "External id": 462525,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452600301.440, "dur": 58.282, + "args": { + "External id": 462526,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267452600398.220, "dur": 130.469, + "args": { + "External id": 462527,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 5310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452600410.434, "dur": 4.258, + "args": { + "External id": 462528,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267452600420.824, "dur": 11.895, + "args": { + "External id": 462529,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267452600425.966, "dur": 6.207, + "args": { + "External id": 462530,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 5313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600430.038, "dur": 0.707, + "args": { + "External id": 462531,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452600440.472, "dur": 27.392, + "args": { + "External id": 462532,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600442.566, "dur": 0.362, + "args": { + "External id": 462533,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600444.413, "dur": 0.359, + "args": { + "External id": 462534,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600447.325, "dur": 2.675, + "args": { + "External id": 462535,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600451.282, "dur": 0.426, + "args": { + "External id": 462536,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600452.880, "dur": 0.434, + "args": { + "External id": 462537,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600456.293, "dur": 0.413, + "args": { + "External id": 462538,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600458.035, "dur": 0.530, + "args": { + "External id": 462539,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600460.319, "dur": 1.873, + "args": { + "External id": 462540,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452600463.615, "dur": 0.432, + "args": { + "External id": 462541,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452600482.857, "dur": 36.530, + "args": { + "External id": 462542,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452600584.541, "dur": 175.634, + "args": { + "External id": 462543,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452600639.569, "dur": 115.858, + "args": { + "External id": 462544,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5327, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452600652.596, "dur": 97.288, + "args": { + "External id": 462545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452600779.429, "dur": 2.353, + "args": { + "External id": 462546,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5329, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452600882.855, "dur": 2262.679, + "args": { + "External id": 462547,"Sequence number": 5036372, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5330 + } + }, + { + "ph": "f", "id": 218, "pid": 2338707, "tid": 2379456, "ts": 6267452600882.855, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452601021.095, "dur": 193.435, + "args": { + "External id": 462548,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452601278.409, "dur": 53.048, + "args": { + "External id": 462549,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267452601353.842, "dur": 67.060, + "args": { + "External id": 462550,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452601435.924, "dur": 38.310, + "args": { + "External id": 462551,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452601481.917, "dur": 41.061, + "args": { + "External id": 462552,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452601530.956, "dur": 32.087, + "args": { + "External id": 462553,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452601571.548, "dur": 55.155, + "args": { + "External id": 462554,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452601664.308, "dur": 32.921, + "args": { + "External id": 462555,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452601719.754, "dur": 31.425, + "args": { + "External id": 462556,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452601783.755, "dur": 21.802, + "args": { + "External id": 462557,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452601820.868, "dur": 19.320, + "args": { + "External id": 462558,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452601850.570, "dur": 46.186, + "args": { + "External id": 462559,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452601901.241, "dur": 38.130, + "args": { + "External id": 462560,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267452601982.546, "dur": 379.225, + "args": { + "External id": 462561,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452602141.214, "dur": 10.848, + "args": { + "External id": 462562,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452602155.751, "dur": 4.745, + "args": { + "External id": 462563,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452602162.225, "dur": 2.267, + "args": { + "External id": 462564,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452602166.048, "dur": 3.356, + "args": { + "External id": 462565,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452602233.554, "dur": 6.100, + "args": { + "External id": 462566,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452602235.950, "dur": 3.485, + "args": { + "External id": 462567,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452602241.974, "dur": 39.793, + "args": { + "External id": 462568,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452602248.803, "dur": 3.874, + "args": { + "External id": 462569,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452602283.372, "dur": 4.573, + "args": { + "External id": 462570,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452602286.961, "dur": 0.786, + "args": { + "External id": 462571,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452602288.922, "dur": 19.337, + "args": { + "External id": 462572,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452602292.159, "dur": 1.949, + "args": { + "External id": 462573,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452602414.582, "dur": 34.299, + "args": { + "External id": 462574,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452602471.944, "dur": 21.140, + "args": { + "External id": 462575,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452602502.583, "dur": 62.991, + "args": { + "External id": 462576,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452602573.818, "dur": 81.733, + "args": { + "External id": 462577,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452602672.166, "dur": 29.108, + "args": { + "External id": 462578,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452602708.086, "dur": 37.880, + "args": { + "External id": 462579,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452602754.868, "dur": 32.658, + "args": { + "External id": 462580,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452602797.382, "dur": 35.738, + "args": { + "External id": 462581,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267452602863.221, "dur": 29.307, + "args": { + "External id": 462582,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452602919.840, "dur": 30.194, + "args": { + "External id": 462583,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452602968.426, "dur": 20.947, + "args": { + "External id": 462584,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452603013.448, "dur": 18.582, + "args": { + "External id": 462585,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267452603046.763, "dur": 58.413, + "args": { + "External id": 462586,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603201.248, "dur": 18.322, + "args": { + "External id": 462587,"Record function id": 0, "Ev Idx": 5370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603205.600, "dur": 12.901, + "args": { + "External id": 462588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603210.904, "dur": 6.362, + "args": { + "External id": 462589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603212.435, "dur": 4.679, + "args": { + "External id": 462590,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603224.456, "dur": 5.289, + "args": { + "External id": 462591,"Record function id": 0, "Ev Idx": 5374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603226.354, "dur": 2.815, + "args": { + "External id": 462592,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603227.254, "dur": 1.360, + "args": { + "External id": 462593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603227.658, "dur": 0.874, + "args": { + "External id": 462594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603233.888, "dur": 8.440, + "args": { + "External id": 462595,"Record function id": 0, "Ev Idx": 5378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603235.632, "dur": 6.145, + "args": { + "External id": 462596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603236.268, "dur": 4.993, + "args": { + "External id": 462597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603237.185, "dur": 3.963, + "args": { + "External id": 462598,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603246.248, "dur": 4.286, + "args": { + "External id": 462599,"Record function id": 0, "Ev Idx": 5382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603247.653, "dur": 2.368, + "args": { + "External id": 462600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603248.261, "dur": 1.247, + "args": { + "External id": 462601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603248.616, "dur": 0.808, + "args": { + "External id": 462602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603254.394, "dur": 4.687, + "args": { + "External id": 462603,"Record function id": 0, "Ev Idx": 5386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603256.038, "dur": 2.520, + "args": { + "External id": 462604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603256.630, "dur": 1.297, + "args": { + "External id": 462605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603257.128, "dur": 0.717, + "args": { + "External id": 462606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603262.880, "dur": 6.472, + "args": { + "External id": 462607,"Record function id": 0, "Ev Idx": 5390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603264.282, "dur": 4.556, + "args": { + "External id": 462608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603265.048, "dur": 3.202, + "args": { + "External id": 462609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603267.391, "dur": 0.722, + "args": { + "External id": 462610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603273.568, "dur": 4.648, + "args": { + "External id": 462611,"Record function id": 0, "Ev Idx": 5394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603275.044, "dur": 2.669, + "args": { + "External id": 462612,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603275.994, "dur": 1.199, + "args": { + "External id": 462613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603276.352, "dur": 0.738, + "args": { + "External id": 462614,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603281.967, "dur": 4.913, + "args": { + "External id": 462615,"Record function id": 0, "Ev Idx": 5398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603283.612, "dur": 2.620, + "args": { + "External id": 462616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603284.315, "dur": 1.348, + "args": { + "External id": 462617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603284.808, "dur": 0.762, + "args": { + "External id": 462618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603290.717, "dur": 4.555, + "args": { + "External id": 462619,"Record function id": 0, "Ev Idx": 5402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452603292.171, "dur": 2.595, + "args": { + "External id": 462620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603293.002, "dur": 1.258, + "args": { + "External id": 462621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452603293.479, "dur": 0.702, + "args": { + "External id": 462622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452603300.396, "dur": 64193.522, + "args": { + "External id": 462623,"Record function id": 0, "Sequence number": 5036371, "Fwd thread id": 1, "Ev Idx": 5406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452603301.981, "dur": 64180.517, + "args": { + "External id": 462624,"Sequence number": 5036371, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5407 + } + }, + { + "ph": "f", "id": 219, "pid": 2338707, "tid": 2379456, "ts": 6267452603301.981, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 2338707, "tid": 2379456, + "ts": 6267452603341.123, "dur": 48.532, + "args": { + "External id": 462625,"Record function id": 0, "Ev Idx": 5408 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 2338707, "tid": 2379456, + "ts": 6267452603399.315, "dur": 78.394, + "args": { + "External id": 462626,"Record function id": 0, "Ev Idx": 5409 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 2338707, "tid": 2379456, + "ts": 6267452603484.904, "dur": 63987.376, + "args": { + "External id": 462627,"Record function id": 0, "Ev Idx": 5410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452603615.117, "dur": 9.899, + "args": { + "External id": 462628,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452603639.982, "dur": 8.067, + "args": { + "External id": 462629,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452603666.545, "dur": 62767.117, + "args": { + "External id": 462630,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452603686.021, "dur": 62731.388, + "args": { + "External id": 462631,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452603809.222, "dur": 23.801, + "args": { + "External id": 462632,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452603861.354, "dur": 62506.431, + "args": { + "External id": 462633,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452603866.997, "dur": 62499.433, + "args": { + "External id": 462634,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452603872.067, "dur": 12.948, + "args": { + "External id": 462635,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452603887.407, "dur": 62475.193, + "args": { + "External id": 462636,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452666558.402, "dur": 14.352, + "args": { + "External id": 462637,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452666562.501, "dur": 9.674, + "args": { + "External id": 462638,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452666622.085, "dur": 483.603, + "args": { + "External id": 462639,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452666661.629, "dur": 436.543, + "args": { + "External id": 462640,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5423, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452666677.922, "dur": 375.295, + "args": { + "External id": 462641,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452667138.043, "dur": 3.762, + "args": { + "External id": 462642,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5425, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452667221.674, "dur": 8.094, + "args": { + "External id": 462643,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452667285.602, "dur": 1.591, + "args": { + "External id": 462644,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452667305.770, "dur": 3.710, + "args": { + "External id": 462645,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452667324.092, "dur": 1.036, + "args": { + "External id": 462646,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452667338.312, "dur": 1.053, + "args": { + "External id": 462647,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452667351.670, "dur": 0.920, + "args": { + "External id": 462648,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452667364.735, "dur": 3.441, + "args": { + "External id": 462649,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452667380.833, "dur": 2.192, + "args": { + "External id": 462650,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452667396.511, "dur": 0.732, + "args": { + "External id": 462651,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452667513.232, "dur": 3687.469, + "args": { + "External id": 462652,"Record function id": 0, "Ev Idx": 5435 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 2338707, "tid": 2379456, + "ts": 6267452667537.534, "dur": 1399.253, + "args": { + "External id": 462653,"Record function id": 0, "Ev Idx": 5436 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338707, "tid": 2379456, + "ts": 6267452667556.446, "dur": 464.754, + "args": { + "External id": 462654,"Record function id": 0, "Ev Idx": 5437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452667699.488, "dur": 5.997, + "args": { + "External id": 462655,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452667709.951, "dur": 0.781, + "args": { + "External id": 462656,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452667712.955, "dur": 3.671, + "args": { + "External id": 462657,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452667718.899, "dur": 0.875, + "args": { + "External id": 462658,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452667721.375, "dur": 0.943, + "args": { + "External id": 462659,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452667726.608, "dur": 0.820, + "args": { + "External id": 462660,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452667729.094, "dur": 2.111, + "args": { + "External id": 462661,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452667732.735, "dur": 0.920, + "args": { + "External id": 462662,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452667735.630, "dur": 0.767, + "args": { + "External id": 462663,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452667740.245, "dur": 1.060, + "args": { + "External id": 462664,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452667766.435, "dur": 215.285, + "args": { + "External id": 462665,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452667795.345, "dur": 180.084, + "args": { + "External id": 462666,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452667828.721, "dur": 20.053, + "args": { + "External id": 462667,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452667852.842, "dur": 84.272, + "args": { + "External id": 462668,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452667855.964, "dur": 80.683, + "args": { + "External id": 462669,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452667860.717, "dur": 8.788, + "args": { + "External id": 462670,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452667871.245, "dur": 64.534, + "args": { + "External id": 462671,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5454 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 2338707, "tid": 2379456, + "ts": 6267452668188.859, "dur": 737.858, + "args": { + "External id": 462672,"Record function id": 0, "Ev Idx": 5455 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338707, "tid": 2379456, + "ts": 6267452668211.526, "dur": 700.353, + "args": { + "External id": 462673,"Record function id": 0, "Ev Idx": 5456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452668293.311, "dur": 7.255, + "args": { + "External id": 462674,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452668319.342, "dur": 36.373, + "args": { + "External id": 462675,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668325.799, "dur": 2.420, + "args": { + "External id": 462676,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668330.547, "dur": 2.224, + "args": { + "External id": 462677,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668333.970, "dur": 0.453, + "args": { + "External id": 462678,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668335.818, "dur": 0.457, + "args": { + "External id": 462679,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668339.528, "dur": 0.384, + "args": { + "External id": 462680,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668341.733, "dur": 2.757, + "args": { + "External id": 462681,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668345.748, "dur": 0.336, + "args": { + "External id": 462682,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668348.526, "dur": 0.441, + "args": { + "External id": 462683,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668350.049, "dur": 0.466, + "args": { + "External id": 462684,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452668369.287, "dur": 59.239, + "args": { + "External id": 462685,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267452668467.404, "dur": 145.166, + "args": { + "External id": 462686,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 5469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452668480.135, "dur": 4.091, + "args": { + "External id": 462687,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267452668490.320, "dur": 11.792, + "args": { + "External id": 462688,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267452668495.379, "dur": 6.227, + "args": { + "External id": 462689,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 5472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668499.550, "dur": 0.735, + "args": { + "External id": 462690,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267452668509.667, "dur": 27.511, + "args": { + "External id": 462691,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668512.366, "dur": 0.505, + "args": { + "External id": 462692,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668516.534, "dur": 0.316, + "args": { + "External id": 462693,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668518.068, "dur": 3.019, + "args": { + "External id": 462694,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668522.479, "dur": 1.876, + "args": { + "External id": 462695,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668525.607, "dur": 0.532, + "args": { + "External id": 462696,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668527.325, "dur": 0.317, + "args": { + "External id": 462697,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668530.231, "dur": 0.355, + "args": { + "External id": 462698,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668531.636, "dur": 0.401, + "args": { + "External id": 462699,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452668533.044, "dur": 0.435, + "args": { + "External id": 462700,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452668548.941, "dur": 36.341, + "args": { + "External id": 462701,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452668673.441, "dur": 151.250, + "args": { + "External id": 462702,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452668712.646, "dur": 107.607, + "args": { + "External id": 462703,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5486, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452668723.628, "dur": 91.184, + "args": { + "External id": 462704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452668847.235, "dur": 2.264, + "args": { + "External id": 462705,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5488, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452668946.306, "dur": 2227.468, + "args": { + "External id": 462706,"Sequence number": 5036370, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5489 + } + }, + { + "ph": "f", "id": 220, "pid": 2338707, "tid": 2379456, "ts": 6267452668946.306, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452669135.902, "dur": 139.915, + "args": { + "External id": 462707,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452669332.969, "dur": 51.155, + "args": { + "External id": 462708,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267452669404.721, "dur": 60.196, + "args": { + "External id": 462709,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452669479.333, "dur": 38.268, + "args": { + "External id": 462710,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452669525.211, "dur": 38.771, + "args": { + "External id": 462711,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452669571.624, "dur": 50.128, + "args": { + "External id": 462712,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452669635.459, "dur": 37.822, + "args": { + "External id": 462713,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452669707.099, "dur": 28.881, + "args": { + "External id": 462714,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452669757.106, "dur": 33.691, + "args": { + "External id": 462715,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452669820.585, "dur": 22.969, + "args": { + "External id": 462716,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452669859.588, "dur": 17.930, + "args": { + "External id": 462717,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452669886.570, "dur": 44.420, + "args": { + "External id": 462718,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452669934.877, "dur": 39.034, + "args": { + "External id": 462719,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267452670032.470, "dur": 383.020, + "args": { + "External id": 462720,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452670193.277, "dur": 10.288, + "args": { + "External id": 462721,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452670206.772, "dur": 3.432, + "args": { + "External id": 462722,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452670211.481, "dur": 2.208, + "args": { + "External id": 462723,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452670214.826, "dur": 2.659, + "args": { + "External id": 462724,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452670282.065, "dur": 11.592, + "args": { + "External id": 462725,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452670284.516, "dur": 8.830, + "args": { + "External id": 462726,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452670295.666, "dur": 40.271, + "args": { + "External id": 462727,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452670302.519, "dur": 3.851, + "args": { + "External id": 462728,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452670340.023, "dur": 1.830, + "args": { + "External id": 462729,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452670340.989, "dur": 0.748, + "args": { + "External id": 462730,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452670342.986, "dur": 21.053, + "args": { + "External id": 462731,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452670349.826, "dur": 0.663, + "args": { + "External id": 462732,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452670464.113, "dur": 36.671, + "args": { + "External id": 462733,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452670522.807, "dur": 19.834, + "args": { + "External id": 462734,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452670551.962, "dur": 80.933, + "args": { + "External id": 462735,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452670644.439, "dur": 55.368, + "args": { + "External id": 462736,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452670713.525, "dur": 26.133, + "args": { + "External id": 462737,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452670749.130, "dur": 35.505, + "args": { + "External id": 462738,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452670794.167, "dur": 31.727, + "args": { + "External id": 462739,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452670833.447, "dur": 34.629, + "args": { + "External id": 462740,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267452670895.527, "dur": 29.257, + "args": { + "External id": 462741,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452670945.668, "dur": 27.298, + "args": { + "External id": 462742,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452670993.958, "dur": 19.883, + "args": { + "External id": 462743,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452671032.211, "dur": 16.190, + "args": { + "External id": 462744,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267452671111.870, "dur": 23.088, + "args": { + "External id": 462745,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671228.187, "dur": 20.767, + "args": { + "External id": 462746,"Record function id": 0, "Ev Idx": 5529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671232.868, "dur": 14.929, + "args": { + "External id": 462747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671237.775, "dur": 8.621, + "args": { + "External id": 462748,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671239.651, "dur": 6.561, + "args": { + "External id": 462749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671253.842, "dur": 5.566, + "args": { + "External id": 462750,"Record function id": 0, "Ev Idx": 5533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671255.534, "dur": 3.219, + "args": { + "External id": 462751,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671256.545, "dur": 1.654, + "args": { + "External id": 462752,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671257.123, "dur": 0.959, + "args": { + "External id": 462753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671263.759, "dur": 7.508, + "args": { + "External id": 462754,"Record function id": 0, "Ev Idx": 5537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671265.342, "dur": 5.374, + "args": { + "External id": 462755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671266.234, "dur": 3.994, + "args": { + "External id": 462756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671267.056, "dur": 3.056, + "args": { + "External id": 462757,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671275.534, "dur": 4.604, + "args": { + "External id": 462758,"Record function id": 0, "Ev Idx": 5541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671276.843, "dur": 2.754, + "args": { + "External id": 462759,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671277.435, "dur": 1.521, + "args": { + "External id": 462760,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671277.824, "dur": 1.058, + "args": { + "External id": 462761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671284.153, "dur": 4.209, + "args": { + "External id": 462762,"Record function id": 0, "Ev Idx": 5545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671285.704, "dur": 2.149, + "args": { + "External id": 462763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671286.235, "dur": 1.106, + "args": { + "External id": 462764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671286.633, "dur": 0.626, + "args": { + "External id": 462765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671292.348, "dur": 4.958, + "args": { + "External id": 462766,"Record function id": 0, "Ev Idx": 5549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671293.722, "dur": 3.078, + "args": { + "External id": 462767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671294.682, "dur": 1.580, + "args": { + "External id": 462768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671295.330, "dur": 0.856, + "args": { + "External id": 462769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671301.315, "dur": 4.047, + "args": { + "External id": 462770,"Record function id": 0, "Ev Idx": 5553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671302.693, "dur": 2.172, + "args": { + "External id": 462771,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671303.301, "dur": 1.061, + "args": { + "External id": 462772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671303.688, "dur": 0.592, + "args": { + "External id": 462773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671309.194, "dur": 3.985, + "args": { + "External id": 462774,"Record function id": 0, "Ev Idx": 5557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671310.384, "dur": 2.263, + "args": { + "External id": 462775,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671310.980, "dur": 1.186, + "args": { + "External id": 462776,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671311.330, "dur": 0.753, + "args": { + "External id": 462777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671317.341, "dur": 6.434, + "args": { + "External id": 462778,"Record function id": 0, "Ev Idx": 5561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452671318.737, "dur": 4.517, + "args": { + "External id": 462779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671319.314, "dur": 3.460, + "args": { + "External id": 462780,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452671321.916, "dur": 0.719, + "args": { + "External id": 462781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452671329.087, "dur": 61692.470, + "args": { + "External id": 462782,"Record function id": 0, "Sequence number": 5036369, "Fwd thread id": 1, "Ev Idx": 5565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452671330.987, "dur": 61679.566, + "args": { + "External id": 462783,"Sequence number": 5036369, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5566 + } + }, + { + "ph": "f", "id": 221, "pid": 2338707, "tid": 2379456, "ts": 6267452671330.987, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 2338707, "tid": 2379456, + "ts": 6267452671367.589, "dur": 46.829, + "args": { + "External id": 462784,"Record function id": 0, "Ev Idx": 5567 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 2338707, "tid": 2379456, + "ts": 6267452671423.790, "dur": 82.703, + "args": { + "External id": 462785,"Record function id": 0, "Ev Idx": 5568 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 2338707, "tid": 2379456, + "ts": 6267452671513.419, "dur": 61484.278, + "args": { + "External id": 462786,"Record function id": 0, "Ev Idx": 5569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452671644.811, "dur": 10.303, + "args": { + "External id": 462787,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452671668.884, "dur": 8.848, + "args": { + "External id": 462788,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452671696.247, "dur": 60236.650, + "args": { + "External id": 462789,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452671716.463, "dur": 60200.073, + "args": { + "External id": 462790,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452671844.748, "dur": 22.395, + "args": { + "External id": 462791,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452671894.210, "dur": 59970.193, + "args": { + "External id": 462792,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452671897.581, "dur": 59965.868, + "args": { + "External id": 462793,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452671903.107, "dur": 13.813, + "args": { + "External id": 462794,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452671919.527, "dur": 59940.541, + "args": { + "External id": 462795,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452732109.270, "dur": 14.565, + "args": { + "External id": 462796,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452732113.210, "dur": 9.794, + "args": { + "External id": 462797,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452732160.227, "dur": 466.328, + "args": { + "External id": 462798,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452732198.222, "dur": 420.810, + "args": { + "External id": 462799,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5582, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452732214.661, "dur": 375.456, + "args": { + "External id": 462800,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452732657.053, "dur": 3.086, + "args": { + "External id": 462801,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5584, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452732744.944, "dur": 7.780, + "args": { + "External id": 462802,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452732808.502, "dur": 2.210, + "args": { + "External id": 462803,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452732829.841, "dur": 3.903, + "args": { + "External id": 462804,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452732848.345, "dur": 1.206, + "args": { + "External id": 462805,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452732864.648, "dur": 1.145, + "args": { + "External id": 462806,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452732878.464, "dur": 0.807, + "args": { + "External id": 462807,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452732891.064, "dur": 4.052, + "args": { + "External id": 462808,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452732907.127, "dur": 2.371, + "args": { + "External id": 462809,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452732921.401, "dur": 0.917, + "args": { + "External id": 462810,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452733040.005, "dur": 2961.933, + "args": { + "External id": 462811,"Record function id": 0, "Ev Idx": 5594 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 2338707, "tid": 2379456, + "ts": 6267452733108.373, "dur": 583.336, + "args": { + "External id": 462812,"Record function id": 0, "Ev Idx": 5595 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338707, "tid": 2379456, + "ts": 6267452733127.372, "dur": 416.500, + "args": { + "External id": 462813,"Record function id": 0, "Ev Idx": 5596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452733241.788, "dur": 6.237, + "args": { + "External id": 462814,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452733251.925, "dur": 1.127, + "args": { + "External id": 462815,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452733255.392, "dur": 3.593, + "args": { + "External id": 462816,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452733261.482, "dur": 0.695, + "args": { + "External id": 462817,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452733264.014, "dur": 0.866, + "args": { + "External id": 462818,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452733268.735, "dur": 0.979, + "args": { + "External id": 462819,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452733271.429, "dur": 1.900, + "args": { + "External id": 462820,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452733274.852, "dur": 1.322, + "args": { + "External id": 462821,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452733278.145, "dur": 1.298, + "args": { + "External id": 462822,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452733283.710, "dur": 0.809, + "args": { + "External id": 462823,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452733305.976, "dur": 197.918, + "args": { + "External id": 462824,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452733324.836, "dur": 172.674, + "args": { + "External id": 462825,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452733358.819, "dur": 19.513, + "args": { + "External id": 462826,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452733382.585, "dur": 81.307, + "args": { + "External id": 462827,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452733385.748, "dur": 77.703, + "args": { + "External id": 462828,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452733390.765, "dur": 8.911, + "args": { + "External id": 462829,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452733401.735, "dur": 61.076, + "args": { + "External id": 462830,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452733703.211, "dur": 2269.667, + "args": { + "External id": 462831,"Sequence number": 5036368, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5614 + } + }, + { + "ph": "f", "id": 222, "pid": 2338707, "tid": 2379456, "ts": 6267452733703.211, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452733837.592, "dur": 132.212, + "args": { + "External id": 462832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452734021.921, "dur": 99.961, + "args": { + "External id": 462833,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267452734151.997, "dur": 70.138, + "args": { + "External id": 462834,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452734234.725, "dur": 41.664, + "args": { + "External id": 462835,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452734286.215, "dur": 42.102, + "args": { + "External id": 462836,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452734336.465, "dur": 32.075, + "args": { + "External id": 462837,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452734377.931, "dur": 34.855, + "args": { + "External id": 462838,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452734452.582, "dur": 29.885, + "args": { + "External id": 462839,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452734508.869, "dur": 30.525, + "args": { + "External id": 462840,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452734567.852, "dur": 21.690, + "args": { + "External id": 462841,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452734626.917, "dur": 20.237, + "args": { + "External id": 462842,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452734659.514, "dur": 48.882, + "args": { + "External id": 462843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452734713.203, "dur": 39.051, + "args": { + "External id": 462844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267452734788.394, "dur": 407.462, + "args": { + "External id": 462845,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452734882.850, "dur": 7.679, + "args": { + "External id": 462846,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452734893.195, "dur": 4.188, + "args": { + "External id": 462847,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452734899.043, "dur": 4.471, + "args": { + "External id": 462848,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452734949.070, "dur": 10.237, + "args": { + "External id": 462849,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452735015.357, "dur": 6.888, + "args": { + "External id": 462850,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452735018.597, "dur": 3.377, + "args": { + "External id": 462851,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452735024.144, "dur": 80.358, + "args": { + "External id": 462852,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452735030.790, "dur": 1.927, + "args": { + "External id": 462853,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267452735110.825, "dur": 2.359, + "args": { + "External id": 462854,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452735111.756, "dur": 1.309, + "args": { + "External id": 462855,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267452735114.405, "dur": 22.772, + "args": { + "External id": 462856,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452735118.431, "dur": 0.713, + "args": { + "External id": 462857,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452735246.230, "dur": 36.186, + "args": { + "External id": 462858,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452735303.401, "dur": 20.398, + "args": { + "External id": 462859,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452735336.785, "dur": 63.046, + "args": { + "External id": 462860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452735407.970, "dur": 48.369, + "args": { + "External id": 462861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452735468.271, "dur": 25.683, + "args": { + "External id": 462862,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452735500.731, "dur": 36.060, + "args": { + "External id": 462863,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452735544.659, "dur": 69.888, + "args": { + "External id": 462864,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267452735643.678, "dur": 57.341, + "args": { + "External id": 462865,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267452735735.778, "dur": 36.224, + "args": { + "External id": 462866,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452735796.361, "dur": 27.121, + "args": { + "External id": 462867,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452735840.555, "dur": 22.476, + "args": { + "External id": 462868,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267452735883.481, "dur": 17.810, + "args": { + "External id": 462869,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267452735915.724, "dur": 19.671, + "args": { + "External id": 462870,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736029.198, "dur": 21.222, + "args": { + "External id": 462871,"Record function id": 0, "Ev Idx": 5654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736033.460, "dur": 15.739, + "args": { + "External id": 462872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736038.167, "dur": 9.810, + "args": { + "External id": 462873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736039.707, "dur": 8.157, + "args": { + "External id": 462874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736055.155, "dur": 54.544, + "args": { + "External id": 462875,"Record function id": 0, "Ev Idx": 5658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736095.631, "dur": 12.973, + "args": { + "External id": 462876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736097.695, "dur": 9.865, + "args": { + "External id": 462877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736098.540, "dur": 8.673, + "args": { + "External id": 462878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736115.860, "dur": 6.638, + "args": { + "External id": 462879,"Record function id": 0, "Ev Idx": 5662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736118.613, "dur": 3.303, + "args": { + "External id": 462880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736119.329, "dur": 2.090, + "args": { + "External id": 462881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736120.053, "dur": 1.287, + "args": { + "External id": 462882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736126.551, "dur": 5.260, + "args": { + "External id": 462883,"Record function id": 0, "Ev Idx": 5666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736128.387, "dur": 2.912, + "args": { + "External id": 462884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736129.242, "dur": 1.544, + "args": { + "External id": 462885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736129.833, "dur": 0.867, + "args": { + "External id": 462886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736135.735, "dur": 5.469, + "args": { + "External id": 462887,"Record function id": 0, "Ev Idx": 5670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736137.986, "dur": 2.720, + "args": { + "External id": 462888,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736138.522, "dur": 1.571, + "args": { + "External id": 462889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736138.946, "dur": 1.065, + "args": { + "External id": 462890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736145.014, "dur": 6.819, + "args": { + "External id": 462891,"Record function id": 0, "Ev Idx": 5674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736146.596, "dur": 4.723, + "args": { + "External id": 462892,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736147.340, "dur": 3.414, + "args": { + "External id": 462893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736149.935, "dur": 0.739, + "args": { + "External id": 462894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736156.087, "dur": 5.274, + "args": { + "External id": 462895,"Record function id": 0, "Ev Idx": 5678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736157.925, "dur": 2.935, + "args": { + "External id": 462896,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736158.816, "dur": 1.570, + "args": { + "External id": 462897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736159.311, "dur": 0.992, + "args": { + "External id": 462898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736165.601, "dur": 4.783, + "args": { + "External id": 462899,"Record function id": 0, "Ev Idx": 5682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736167.418, "dur": 2.469, + "args": { + "External id": 462900,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736168.340, "dur": 1.069, + "args": { + "External id": 462901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736168.656, "dur": 0.674, + "args": { + "External id": 462902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736174.844, "dur": 6.350, + "args": { + "External id": 462903,"Record function id": 0, "Ev Idx": 5686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452736176.405, "dur": 4.252, + "args": { + "External id": 462904,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736176.975, "dur": 3.196, + "args": { + "External id": 462905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452736177.398, "dur": 2.680, + "args": { + "External id": 462906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452736186.960, "dur": 61844.972, + "args": { + "External id": 462907,"Record function id": 0, "Sequence number": 5036367, "Fwd thread id": 1, "Ev Idx": 5690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452736188.752, "dur": 61831.484, + "args": { + "External id": 462908,"Sequence number": 5036367, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5691 + } + }, + { + "ph": "f", "id": 223, "pid": 2338707, "tid": 2379456, "ts": 6267452736188.752, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 2338707, "tid": 2379456, + "ts": 6267452736228.446, "dur": 46.892, + "args": { + "External id": 462909,"Record function id": 0, "Ev Idx": 5692 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 2338707, "tid": 2379456, + "ts": 6267452736285.066, "dur": 93.225, + "args": { + "External id": 462910,"Record function id": 0, "Ev Idx": 5693 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 2338707, "tid": 2379456, + "ts": 6267452736385.168, "dur": 61625.763, + "args": { + "External id": 462911,"Record function id": 0, "Ev Idx": 5694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452736496.909, "dur": 9.344, + "args": { + "External id": 462912,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452736518.189, "dur": 5.256, + "args": { + "External id": 462913,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452736540.180, "dur": 60394.424, + "args": { + "External id": 462914,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452736556.401, "dur": 60362.000, + "args": { + "External id": 462915,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452736695.108, "dur": 22.873, + "args": { + "External id": 462916,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452736744.118, "dur": 60120.335, + "args": { + "External id": 462917,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452736752.654, "dur": 60110.602, + "args": { + "External id": 462918,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452736757.873, "dur": 12.858, + "args": { + "External id": 462919,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452736773.197, "dur": 60084.466, + "args": { + "External id": 462920,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452797106.981, "dur": 14.860, + "args": { + "External id": 462921,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452797111.379, "dur": 9.818, + "args": { + "External id": 462922,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452797161.521, "dur": 463.081, + "args": { + "External id": 462923,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452797199.581, "dur": 417.996, + "args": { + "External id": 462924,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5707, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452797215.408, "dur": 375.533, + "args": { + "External id": 462925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452797657.807, "dur": 3.675, + "args": { + "External id": 462926,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5709, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452797742.989, "dur": 8.151, + "args": { + "External id": 462927,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452797807.631, "dur": 4.116, + "args": { + "External id": 462928,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452797834.129, "dur": 1.692, + "args": { + "External id": 462929,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452797850.923, "dur": 1.028, + "args": { + "External id": 462930,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452797865.431, "dur": 0.994, + "args": { + "External id": 462931,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452797882.128, "dur": 3.347, + "args": { + "External id": 462932,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452797898.550, "dur": 0.892, + "args": { + "External id": 462933,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452797913.897, "dur": 2.710, + "args": { + "External id": 462934,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452797928.130, "dur": 0.986, + "args": { + "External id": 462935,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452798050.962, "dur": 399.960, + "args": { + "External id": 462936,"Record function id": 0, "Sequence number": 5036366, "Fwd thread id": 1, "Ev Idx": 5719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267452798094.684, "dur": 344.417, + "args": { + "External id": 462937,"Sequence number": 5036366, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5720 + } + }, + { + "ph": "f", "id": 224, "pid": 2338707, "tid": 2379456, "ts": 6267452798094.684, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 2338707, "tid": 2379456, + "ts": 6267452798249.084, "dur": 65.720, + "args": { + "External id": 462938,"kernel_hash": "cylhmuu3q4lld7ngapzsx2q3nsghgiylz3hysdrztph5rn33kr3r", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/yl/cylhmuu3q4lld7ngapzsx2q3nsghgiylz3hysdrztph5rn33kr3r.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 5721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 2338707, "tid": 2379456, + "ts": 6267452798334.155, "dur": 33.904, + "args": { + "External id": 462939,"kernel_hash": "cqovkpvfazf4uawh526mshntxf6zr2towc3gbwazqnfjbhqiu3u5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqovkpvfazf4uawh526mshntxf6zr2towc3gbwazqnfjbhqiu3u5.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096, 4096], [32000, 4096], []], "Ev Idx": 5722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 2338707, "tid": 2379456, + "ts": 6267452798391.587, "dur": 27.883, + "args": { + "External id": 462940,"kernel_hash": "cxsh2jiwiopwz5eatuiwkxhwp4pcrm5kca3w7qyj3ojcu3sw473n", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/xs/cxsh2jiwiopwz5eatuiwkxhwp4pcrm5kca3w7qyj3ojcu3sw473n.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 5723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452798465.616, "dur": 17.407, + "args": { + "External id": 462941,"Record function id": 0, "Ev Idx": 5724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267452798469.903, "dur": 11.934, + "args": { + "External id": 462942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 5725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452798473.944, "dur": 6.899, + "args": { + "External id": 462943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 5726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267452798475.668, "dur": 5.046, + "args": { + "External id": 462944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 5727 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 2338707, "tid": 2379456, + "ts": 6267452798511.565, "dur": 14158.132, + "args": { + "External id": 462945,"Record function id": 0, "Ev Idx": 5728 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 2338707, "tid": 2379456, + "ts": 6267452798535.218, "dur": 46.005, + "args": { + "External id": 462946,"Record function id": 0, "Ev Idx": 5729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 2338707, "tid": 2379456, + "ts": 6267452798588.632, "dur": 337.047, + "args": { + "External id": 462947,"Record function id": 0, "Ev Idx": 5730 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 2338707, "tid": 2379456, + "ts": 6267452798935.419, "dur": 13413.245, + "args": { + "External id": 462948,"Record function id": 0, "Ev Idx": 5731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452799150.787, "dur": 12.015, + "args": { + "External id": 462949,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267452799178.369, "dur": 6.191, + "args": { + "External id": 462950,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 5733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452799212.681, "dur": 11499.005, + "args": { + "External id": 462951,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 5734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267452799235.152, "dur": 11458.481, + "args": { + "External id": 462952,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 5735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452800047.132, "dur": 72.329, + "args": { + "External id": 462953,"Record function id": 0, "Concrete Inputs": ["[277237]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267452800434.694, "dur": 10194.231, + "args": { + "External id": 462954,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], [], []], "Ev Idx": 5737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267452800439.297, "dur": 10187.911, + "args": { + "External id": 462955,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], []], "Ev Idx": 5738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452800448.682, "dur": 19.286, + "args": { + "External id": 462956,"Record function id": 0, "Concrete Inputs": ["[277237]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267452800471.064, "dur": 10147.852, + "args": { + "External id": 462957,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[277237], [277237], []], "Ev Idx": 5740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452810885.027, "dur": 14.087, + "args": { + "External id": 462958,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1134596096], [], [], [], [], []], "Ev Idx": 5741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267452810889.190, "dur": 9.370, + "args": { + "External id": 462959,"Record function id": 0, "Concrete Inputs": ["[141824512]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267452810936.372, "dur": 534.431, + "args": { + "External id": 462960,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[141824512], [1134596096], [], [], [], []], "Ev Idx": 5743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452810972.021, "dur": 491.231, + "args": { + "External id": 462961,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 141824512, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1134596096], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5744, "In msg nelems": 1134596096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267452810987.407, "dur": 468.272, + "args": { + "External id": 462962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1134596096]], "Ev Idx": 5745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267452811501.691, "dur": 3.084, + "args": { + "External id": 462963,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5746, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811582.335, "dur": 26.785, + "args": { + "External id": 462964,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811664.671, "dur": 1.954, + "args": { + "External id": 462965,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811684.600, "dur": 1.592, + "args": { + "External id": 462966,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "16384512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811701.863, "dur": 1.055, + "args": { + "External id": 462967,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "18481664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811719.521, "dur": 3.588, + "args": { + "External id": 462968,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "19005952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811735.874, "dur": 0.763, + "args": { + "External id": 462969,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "19530240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811748.723, "dur": 1.794, + "args": { + "External id": 462970,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811762.673, "dur": 0.793, + "args": { + "External id": 462971,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "21627904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811775.494, "dur": 3.086, + "args": { + "External id": 462972,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "28967936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811793.356, "dur": 0.699, + "args": { + "External id": 462973,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "36307968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811806.997, "dur": 0.937, + "args": { + "External id": 462974,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811818.977, "dur": 1.319, + "args": { + "External id": 462975,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "43648512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811834.854, "dur": 3.351, + "args": { + "External id": 462976,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "45745664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811852.414, "dur": 1.281, + "args": { + "External id": 462977,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "46269952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811869.365, "dur": 1.595, + "args": { + "External id": 462978,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "46794240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811884.537, "dur": 0.963, + "args": { + "External id": 462979,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811897.319, "dur": 3.157, + "args": { + "External id": 462980,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "48891904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811912.626, "dur": 0.911, + "args": { + "External id": 462981,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "56231936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811926.610, "dur": 0.906, + "args": { + "External id": 462982,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "63571968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811940.257, "dur": 1.237, + "args": { + "External id": 462983,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811956.043, "dur": 4.231, + "args": { + "External id": 462984,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "70912512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811972.532, "dur": 0.938, + "args": { + "External id": 462985,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73009664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452811985.814, "dur": 1.090, + "args": { + "External id": 462986,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73533952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452812010.028, "dur": 1.284, + "args": { + "External id": 462987,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "74058240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452812024.832, "dur": 3.674, + "args": { + "External id": 462988,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452812042.761, "dur": 1.043, + "args": { + "External id": 462989,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "76155904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452812095.855, "dur": 1.891, + "args": { + "External id": 462990,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "83495936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452812112.989, "dur": 1.272, + "args": { + "External id": 462991,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "90835968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452812126.673, "dur": 4.809, + "args": { + "External id": 462992,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452812143.278, "dur": 1.323, + "args": { + "External id": 462993,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "98176512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452812158.622, "dur": 1.429, + "args": { + "External id": 462994,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100273664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452812171.588, "dur": 1.404, + "args": { + "External id": 462995,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100797952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452812184.510, "dur": 4.216, + "args": { + "External id": 462996,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "101322240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452812199.417, "dur": 0.963, + "args": { + "External id": 462997,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452812211.206, "dur": 2.387, + "args": { + "External id": 462998,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "103419904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452812228.335, "dur": 1.021, + "args": { + "External id": 462999,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "110759936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452812241.017, "dur": 3.071, + "args": { + "External id": 463000,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "118099968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452812255.359, "dur": 1.225, + "args": { + "External id": 463001,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267452812267.432, "dur": 1.053, + "args": { + "External id": 463002,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "125440512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267453980201.396, "dur": 123.019, + "args": { + "External id": 463003,"Record function id": 0, "Sequence number": 5036821, "Fwd thread id": 1, "Ev Idx": 5786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267453980211.785, "dur": 102.304, + "args": { + "External id": 463004,"Sequence number": 5036821, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5787 + } + }, + { + "ph": "f", "id": 225, "pid": 2338707, "tid": 2379456, "ts": 6267453980211.785, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338707, "tid": 2379456, + "ts": 6267453980220.209, "dur": 91.910, + "args": { + "External id": 463005,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267453980334.001, "dur": 284.064, + "args": { + "External id": 463006,"Record function id": 0, "Ev Idx": 5789 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267453980417.108, "dur": 107.053, + "args": { + "External id": 463007,"Record function id": 0, "Ev Idx": 5790 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.25", "pid": 2338707, "tid": 2379456, + "ts": 6267453980454.563, "dur": 54.900, + "args": { + "External id": 463008,"Record function id": 0, "Ev Idx": 5791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267453980530.557, "dur": 2.335, + "args": { + "External id": 463009,"Sequence number": 5036820, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5792 + } + }, + { + "ph": "f", "id": 226, "pid": 2338707, "tid": 2379456, "ts": 6267453980530.557, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267453980537.472, "dur": 74.142, + "args": { + "External id": 463010,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267453980543.296, "dur": 67.156, + "args": { + "External id": 463011,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267453980555.167, "dur": 3.264, + "args": { + "External id": 463012,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267453980631.619, "dur": 37921.595, + "args": { + "External id": 463013,"Record function id": 0, "Sequence number": 5036818, "Fwd thread id": 1, "Ev Idx": 5796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267453980633.994, "dur": 37900.278, + "args": { + "External id": 463014,"Sequence number": 5036818, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5797 + } + }, + { + "ph": "f", "id": 227, "pid": 2338707, "tid": 2379456, "ts": 6267453980633.994, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267453980682.515, "dur": 5.239, + "args": { + "External id": 463015,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267453980691.352, "dur": 37436.983, + "args": { + "External id": 463016,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267453980693.715, "dur": 37433.153, + "args": { + "External id": 463017,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267453980697.570, "dur": 10.084, + "args": { + "External id": 463018,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267453980709.906, "dur": 37407.863, + "args": { + "External id": 463019,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338707, "tid": 2379456, + "ts": 6267454018148.222, "dur": 1.317, + "args": { + "External id": 463020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338707, "tid": 2379456, + "ts": 6267454018152.998, "dur": 7.665, + "args": { + "External id": 463021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338707, "tid": 2379456, + "ts": 6267454018157.559, "dur": 2.688, + "args": { + "External id": 463022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2379456, + "ts": 6267454018175.292, "dur": 67.895, + "args": { + "External id": 463023,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338707, "tid": 2379456, + "ts": 6267454018255.580, "dur": 60.503, + "args": { + "External id": 463024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2379456, + "ts": 6267454018257.550, "dur": 58.242, + "args": { + "External id": 463025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2379456, + "ts": 6267454018259.934, "dur": 55.316, + "args": { + "External id": 463026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454018579.727, "dur": 48.165, + "args": { + "External id": 463027,"Record function id": 0, "Sequence number": 5036817, "Fwd thread id": 1, "Ev Idx": 5810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454018582.880, "dur": 40.165, + "args": { + "External id": 463028,"Sequence number": 5036817, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 5811 + } + }, + { + "ph": "f", "id": 228, "pid": 2338707, "tid": 2379456, "ts": 6267454018582.880, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454018608.397, "dur": 14.242, + "args": { + "External id": 463029,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454018613.947, "dur": 8.305, + "args": { + "External id": 463030,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454018635.511, "dur": 130.501, + "args": { + "External id": 463031,"Record function id": 0, "Sequence number": 5036816, "Fwd thread id": 1, "Ev Idx": 5814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454018636.831, "dur": 120.076, + "args": { + "External id": 463032,"Sequence number": 5036816, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5815 + } + }, + { + "ph": "f", "id": 229, "pid": 2338707, "tid": 2379456, "ts": 6267454018636.831, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454018641.212, "dur": 114.998, + "args": { + "External id": 463033,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 5816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267454018647.614, "dur": 47.643, + "args": { + "External id": 463034,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454018651.381, "dur": 9.188, + "args": { + "External id": 463035,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267454018662.563, "dur": 32.302, + "args": { + "External id": 463036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267454018666.950, "dur": 27.215, + "args": { + "External id": 463037,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 5820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454018698.134, "dur": 7.494, + "args": { + "External id": 463038,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 5821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454018702.828, "dur": 2.149, + "args": { + "External id": 463039,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454018709.228, "dur": 45.880, + "args": { + "External id": 463040,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454018772.048, "dur": 67.829, + "args": { + "External id": 463041,"Record function id": 0, "Sequence number": 5036815, "Fwd thread id": 1, "Ev Idx": 5824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454018773.307, "dur": 62.630, + "args": { + "External id": 463042,"Sequence number": 5036815, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5825 + } + }, + { + "ph": "f", "id": 230, "pid": 2338707, "tid": 2379456, "ts": 6267454018773.307, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454018776.921, "dur": 58.669, + "args": { + "External id": 463043,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "3"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267454018780.217, "dur": 21.549, + "args": { + "External id": 463044,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454018782.189, "dur": 3.323, + "args": { + "External id": 463045,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267454018786.323, "dur": 15.154, + "args": { + "External id": 463046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267454018788.947, "dur": 12.094, + "args": { + "External id": 463047,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2379456, + "ts": 6267454018803.344, "dur": 10.497, + "args": { + "External id": 463048,"Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 5831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454018811.688, "dur": 1.354, + "args": { + "External id": 463049,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454018814.870, "dur": 20.058, + "args": { + "External id": 463050,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454018844.983, "dur": 204.996, + "args": { + "External id": 463051,"Record function id": 0, "Sequence number": 5036814, "Fwd thread id": 1, "Ev Idx": 5834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454018846.099, "dur": 199.989, + "args": { + "External id": 463052,"Sequence number": 5036814, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5835 + } + }, + { + "ph": "f", "id": 231, "pid": 2338707, "tid": 2379456, "ts": 6267454018846.099, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454018848.437, "dur": 197.202, + "args": { + "External id": 463053,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267454018850.468, "dur": 20.391, + "args": { + "External id": 463054,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454018853.177, "dur": 2.738, + "args": { + "External id": 463055,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267454018856.751, "dur": 13.794, + "args": { + "External id": 463056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267454018858.501, "dur": 11.534, + "args": { + "External id": 463057,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454018872.469, "dur": 3.615, + "args": { + "External id": 463058,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454018874.878, "dur": 0.987, + "args": { + "External id": 463059,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454018877.020, "dur": 167.326, + "args": { + "External id": 463060,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454019101.955, "dur": 115.672, + "args": { + "External id": 463061,"Record function id": 0, "Sequence number": 5036813, "Fwd thread id": 1, "Ev Idx": 5844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454019104.002, "dur": 107.920, + "args": { + "External id": 463062,"Sequence number": 5036813, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5845 + } + }, + { + "ph": "f", "id": 232, "pid": 2338707, "tid": 2379456, "ts": 6267454019104.002, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454019107.108, "dur": 104.235, + "args": { + "External id": 463063,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267454019108.680, "dur": 23.995, + "args": { + "External id": 463064,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454019110.798, "dur": 4.020, + "args": { + "External id": 463065,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267454019116.039, "dur": 16.340, + "args": { + "External id": 463066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267454019117.889, "dur": 14.066, + "args": { + "External id": 463067,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454019134.412, "dur": 6.983, + "args": { + "External id": 463068,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454019137.066, "dur": 3.956, + "args": { + "External id": 463069,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454019150.292, "dur": 60.032, + "args": { + "External id": 463070,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454019226.685, "dur": 42.764, + "args": { + "External id": 463071,"Record function id": 0, "Sequence number": 5036812, "Fwd thread id": 1, "Ev Idx": 5854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454019228.177, "dur": 1.364, + "args": { + "External id": 463072,"Sequence number": 5036812, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5855 + } + }, + { + "ph": "f", "id": 233, "pid": 2338707, "tid": 2379456, "ts": 6267454019228.177, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454019232.802, "dur": 32.803, + "args": { + "External id": 463073,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454019235.795, "dur": 29.222, + "args": { + "External id": 463074,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454019243.372, "dur": 0.868, + "args": { + "External id": 463075,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454019276.049, "dur": 2370.892, + "args": { + "External id": 463076,"Record function id": 0, "Sequence number": 5036810, "Fwd thread id": 1, "Ev Idx": 5859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454019277.972, "dur": 2307.393, + "args": { + "External id": 463077,"Sequence number": 5036810, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5860 + } + }, + { + "ph": "f", "id": 234, "pid": 2338707, "tid": 2379456, "ts": 6267454019277.972, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454019329.263, "dur": 4.853, + "args": { + "External id": 463078,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454019337.471, "dur": 1987.940, + "args": { + "External id": 463079,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454019339.735, "dur": 1985.321, + "args": { + "External id": 463080,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454019343.768, "dur": 4.857, + "args": { + "External id": 463081,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454019349.688, "dur": 1974.058, + "args": { + "External id": 463082,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338707, "tid": 2379456, + "ts": 6267454021330.113, "dur": 0.480, + "args": { + "External id": 463083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338707, "tid": 2379456, + "ts": 6267454021332.828, "dur": 5.738, + "args": { + "External id": 463084,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338707, "tid": 2379456, + "ts": 6267454021337.177, "dur": 1.221, + "args": { + "External id": 463085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2379456, + "ts": 6267454021344.411, "dur": 26.845, + "args": { + "External id": 463086,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338707, "tid": 2379456, + "ts": 6267454021378.364, "dur": 48.369, + "args": { + "External id": 463087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2379456, + "ts": 6267454021380.048, "dur": 46.397, + "args": { + "External id": 463088,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2379456, + "ts": 6267454021381.480, "dur": 44.649, + "args": { + "External id": 463089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454021615.478, "dur": 26.111, + "args": { + "External id": 463090,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 5873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454021661.977, "dur": 17.807, + "args": { + "External id": 463091,"Record function id": 0, "Sequence number": 5036809, "Fwd thread id": 1, "Ev Idx": 5874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454021663.796, "dur": 12.793, + "args": { + "External id": 463092,"Sequence number": 5036809, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 5875 + } + }, + { + "ph": "f", "id": 235, "pid": 2338707, "tid": 2379456, "ts": 6267454021663.796, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454021668.736, "dur": 7.533, + "args": { + "External id": 463093,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454021670.701, "dur": 5.375, + "args": { + "External id": 463094,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454021684.222, "dur": 90.150, + "args": { + "External id": 463095,"Record function id": 0, "Sequence number": 5036808, "Fwd thread id": 1, "Ev Idx": 5878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454021685.301, "dur": 84.027, + "args": { + "External id": 463096,"Sequence number": 5036808, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5879 + } + }, + { + "ph": "f", "id": 236, "pid": 2338707, "tid": 2379456, "ts": 6267454021685.301, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454021688.087, "dur": 80.665, + "args": { + "External id": 463097,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 5880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267454021694.447, "dur": 30.082, + "args": { + "External id": 463098,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454021697.001, "dur": 4.858, + "args": { + "External id": 463099,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267454021703.156, "dur": 20.923, + "args": { + "External id": 463100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267454021705.443, "dur": 18.160, + "args": { + "External id": 463101,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 5884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454021726.840, "dur": 5.510, + "args": { + "External id": 463102,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 5885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454021730.430, "dur": 1.511, + "args": { + "External id": 463103,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454021733.670, "dur": 34.173, + "args": { + "External id": 463104,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454021780.275, "dur": 62.805, + "args": { + "External id": 463105,"Record function id": 0, "Sequence number": 5036807, "Fwd thread id": 1, "Ev Idx": 5888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454021781.461, "dur": 58.494, + "args": { + "External id": 463106,"Sequence number": 5036807, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5889 + } + }, + { + "ph": "f", "id": 237, "pid": 2338707, "tid": 2379456, "ts": 6267454021781.461, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454021784.345, "dur": 55.230, + "args": { + "External id": 463107,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "2"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267454021786.918, "dur": 21.426, + "args": { + "External id": 463108,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454021788.428, "dur": 2.640, + "args": { + "External id": 463109,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267454021794.475, "dur": 13.521, + "args": { + "External id": 463110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267454021795.620, "dur": 11.768, + "args": { + "External id": 463111,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2379456, + "ts": 6267454021814.073, "dur": 6.898, + "args": { + "External id": 463112,"Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 5895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454021819.295, "dur": 1.029, + "args": { + "External id": 463113,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454021821.845, "dur": 16.875, + "args": { + "External id": 463114,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454021847.733, "dur": 144.819, + "args": { + "External id": 463115,"Record function id": 0, "Sequence number": 5036806, "Fwd thread id": 1, "Ev Idx": 5898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454021848.763, "dur": 139.438, + "args": { + "External id": 463116,"Sequence number": 5036806, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5899 + } + }, + { + "ph": "f", "id": 238, "pid": 2338707, "tid": 2379456, "ts": 6267454021848.763, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454021850.996, "dur": 136.723, + "args": { + "External id": 463117,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267454021852.885, "dur": 19.670, + "args": { + "External id": 463118,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454021854.357, "dur": 2.328, + "args": { + "External id": 463119,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267454021859.767, "dur": 12.471, + "args": { + "External id": 463120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267454021860.566, "dur": 11.317, + "args": { + "External id": 463121,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454021873.627, "dur": 5.622, + "args": { + "External id": 463122,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454021875.803, "dur": 3.236, + "args": { + "External id": 463123,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454021880.189, "dur": 106.516, + "args": { + "External id": 463124,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454021998.354, "dur": 184.866, + "args": { + "External id": 463125,"Record function id": 0, "Sequence number": 5036805, "Fwd thread id": 1, "Ev Idx": 5908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454021999.667, "dur": 155.991, + "args": { + "External id": 463126,"Sequence number": 5036805, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5909 + } + }, + { + "ph": "f", "id": 239, "pid": 2338707, "tid": 2379456, "ts": 6267454021999.667, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454022001.793, "dur": 153.291, + "args": { + "External id": 463127,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267454022003.333, "dur": 22.471, + "args": { + "External id": 463128,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454022004.755, "dur": 2.529, + "args": { + "External id": 463129,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267454022008.233, "dur": 17.256, + "args": { + "External id": 463130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267454022011.493, "dur": 13.475, + "args": { + "External id": 463131,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454022026.913, "dur": 3.206, + "args": { + "External id": 463132,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454022029.242, "dur": 0.620, + "args": { + "External id": 463133,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454022031.069, "dur": 122.104, + "args": { + "External id": 463134,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454022163.335, "dur": 18.112, + "args": { + "External id": 463135,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454022191.554, "dur": 41.423, + "args": { + "External id": 463136,"Record function id": 0, "Sequence number": 5036804, "Fwd thread id": 1, "Ev Idx": 5919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454022193.038, "dur": 1.143, + "args": { + "External id": 463137,"Sequence number": 5036804, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5920 + } + }, + { + "ph": "f", "id": 240, "pid": 2338707, "tid": 2379456, "ts": 6267454022193.038, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454022197.645, "dur": 31.866, + "args": { + "External id": 463138,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454022200.584, "dur": 28.342, + "args": { + "External id": 463139,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454022207.564, "dur": 0.765, + "args": { + "External id": 463140,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454022239.499, "dur": 3377.924, + "args": { + "External id": 463141,"Record function id": 0, "Sequence number": 5036802, "Fwd thread id": 1, "Ev Idx": 5924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454022241.392, "dur": 3322.074, + "args": { + "External id": 463142,"Sequence number": 5036802, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5925 + } + }, + { + "ph": "f", "id": 241, "pid": 2338707, "tid": 2379456, "ts": 6267454022241.392, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454022283.274, "dur": 3.686, + "args": { + "External id": 463143,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454022290.174, "dur": 3030.106, + "args": { + "External id": 463144,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454022292.237, "dur": 3027.685, + "args": { + "External id": 463145,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454022295.629, "dur": 4.963, + "args": { + "External id": 463146,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454022304.400, "dur": 3014.337, + "args": { + "External id": 463147,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338707, "tid": 2379456, + "ts": 6267454025324.714, "dur": 0.445, + "args": { + "External id": 463148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338707, "tid": 2379456, + "ts": 6267454025327.116, "dur": 3.058, + "args": { + "External id": 463149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338707, "tid": 2379456, + "ts": 6267454025328.731, "dur": 1.309, + "args": { + "External id": 463150,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2379456, + "ts": 6267454025335.612, "dur": 25.761, + "args": { + "External id": 463151,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338707, "tid": 2379456, + "ts": 6267454025367.960, "dur": 45.083, + "args": { + "External id": 463152,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2379456, + "ts": 6267454025369.466, "dur": 43.304, + "args": { + "External id": 463153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2379456, + "ts": 6267454025371.116, "dur": 41.184, + "args": { + "External id": 463154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454025576.961, "dur": 34.989, + "args": { + "External id": 463155,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 5938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454025631.019, "dur": 20.540, + "args": { + "External id": 463156,"Record function id": 0, "Sequence number": 5036801, "Fwd thread id": 1, "Ev Idx": 5939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454025636.352, "dur": 11.795, + "args": { + "External id": 463157,"Sequence number": 5036801, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 5940 + } + }, + { + "ph": "f", "id": 242, "pid": 2338707, "tid": 2379456, "ts": 6267454025636.352, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454025640.254, "dur": 7.594, + "args": { + "External id": 463158,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454025642.148, "dur": 5.437, + "args": { + "External id": 463159,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454025656.008, "dur": 92.542, + "args": { + "External id": 463160,"Record function id": 0, "Sequence number": 5036800, "Fwd thread id": 1, "Ev Idx": 5943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454025657.128, "dur": 85.888, + "args": { + "External id": 463161,"Sequence number": 5036800, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5944 + } + }, + { + "ph": "f", "id": 243, "pid": 2338707, "tid": 2379456, "ts": 6267454025657.128, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454025659.780, "dur": 82.754, + "args": { + "External id": 463162,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 5945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267454025663.459, "dur": 32.194, + "args": { + "External id": 463163,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454025666.014, "dur": 4.510, + "args": { + "External id": 463164,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267454025671.461, "dur": 23.809, + "args": { + "External id": 463165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267454025673.470, "dur": 21.259, + "args": { + "External id": 463166,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 5949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454025697.580, "dur": 8.154, + "args": { + "External id": 463167,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 5950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454025701.230, "dur": 4.098, + "args": { + "External id": 463168,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454025709.493, "dur": 32.017, + "args": { + "External id": 463169,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454025754.217, "dur": 61.256, + "args": { + "External id": 463170,"Record function id": 0, "Sequence number": 5036799, "Fwd thread id": 1, "Ev Idx": 5953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454025755.399, "dur": 56.662, + "args": { + "External id": 463171,"Sequence number": 5036799, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5954 + } + }, + { + "ph": "f", "id": 244, "pid": 2338707, "tid": 2379456, "ts": 6267454025755.399, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454025757.819, "dur": 53.807, + "args": { + "External id": 463172,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267454025760.290, "dur": 20.696, + "args": { + "External id": 463173,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454025761.424, "dur": 3.130, + "args": { + "External id": 463174,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267454025765.311, "dur": 15.357, + "args": { + "External id": 463175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267454025766.513, "dur": 13.757, + "args": { + "External id": 463176,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2379456, + "ts": 6267454025782.549, "dur": 9.026, + "args": { + "External id": 463177,"Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 5960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454025789.656, "dur": 1.305, + "args": { + "External id": 463178,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454025792.550, "dur": 18.312, + "args": { + "External id": 463179,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454025820.219, "dur": 134.020, + "args": { + "External id": 463180,"Record function id": 0, "Sequence number": 5036798, "Fwd thread id": 1, "Ev Idx": 5963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454025821.253, "dur": 129.066, + "args": { + "External id": 463181,"Sequence number": 5036798, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5964 + } + }, + { + "ph": "f", "id": 245, "pid": 2338707, "tid": 2379456, "ts": 6267454025821.253, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454025823.337, "dur": 126.447, + "args": { + "External id": 463182,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267454025824.774, "dur": 18.969, + "args": { + "External id": 463183,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454025826.240, "dur": 2.348, + "args": { + "External id": 463184,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267454025829.225, "dur": 14.234, + "args": { + "External id": 463185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267454025830.241, "dur": 12.816, + "args": { + "External id": 463186,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454025844.735, "dur": 3.446, + "args": { + "External id": 463187,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454025847.122, "dur": 0.797, + "args": { + "External id": 463188,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454025851.350, "dur": 97.391, + "args": { + "External id": 463189,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454025960.185, "dur": 177.022, + "args": { + "External id": 463190,"Record function id": 0, "Sequence number": 5036797, "Fwd thread id": 1, "Ev Idx": 5973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454025961.410, "dur": 145.206, + "args": { + "External id": 463191,"Sequence number": 5036797, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5974 + } + }, + { + "ph": "f", "id": 246, "pid": 2338707, "tid": 2379456, "ts": 6267454025961.410, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454025962.968, "dur": 143.127, + "args": { + "External id": 463192,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267454025964.306, "dur": 21.816, + "args": { + "External id": 463193,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454025965.635, "dur": 2.483, + "args": { + "External id": 463194,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267454025968.835, "dur": 16.973, + "args": { + "External id": 463195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267454025969.780, "dur": 15.535, + "args": { + "External id": 463196,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454025987.360, "dur": 3.307, + "args": { + "External id": 463197,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454025989.610, "dur": 0.807, + "args": { + "External id": 463198,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454025993.808, "dur": 110.387, + "args": { + "External id": 463199,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454026114.516, "dur": 21.112, + "args": { + "External id": 463200,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454026145.572, "dur": 46.309, + "args": { + "External id": 463201,"Record function id": 0, "Sequence number": 5036796, "Fwd thread id": 1, "Ev Idx": 5984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454026146.914, "dur": 1.453, + "args": { + "External id": 463202,"Sequence number": 5036796, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5985 + } + }, + { + "ph": "f", "id": 247, "pid": 2338707, "tid": 2379456, "ts": 6267454026146.914, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454026151.008, "dur": 34.312, + "args": { + "External id": 463203,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454026153.670, "dur": 31.152, + "args": { + "External id": 463204,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454026160.610, "dur": 3.169, + "args": { + "External id": 463205,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454026196.995, "dur": 3378.002, + "args": { + "External id": 463206,"Record function id": 0, "Sequence number": 5036795, "Fwd thread id": 1, "Ev Idx": 5989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454026211.748, "dur": 3327.228, + "args": { + "External id": 463207,"Sequence number": 5036795, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5990 + } + }, + { + "ph": "f", "id": 248, "pid": 2338707, "tid": 2379456, "ts": 6267454026211.748, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454026247.410, "dur": 3.376, + "args": { + "External id": 463208,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454026253.765, "dur": 3045.092, + "args": { + "External id": 463209,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454026255.832, "dur": 3042.628, + "args": { + "External id": 463210,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454026259.475, "dur": 4.220, + "args": { + "External id": 463211,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454026264.856, "dur": 3032.329, + "args": { + "External id": 463212,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338707, "tid": 2379456, + "ts": 6267454029303.004, "dur": 0.512, + "args": { + "External id": 463213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338707, "tid": 2379456, + "ts": 6267454029307.866, "dur": 3.081, + "args": { + "External id": 463214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338707, "tid": 2379456, + "ts": 6267454029309.624, "dur": 1.177, + "args": { + "External id": 463215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2379456, + "ts": 6267454029316.750, "dur": 27.676, + "args": { + "External id": 463216,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338707, "tid": 2379456, + "ts": 6267454029350.768, "dur": 43.753, + "args": { + "External id": 463217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2379456, + "ts": 6267454029352.279, "dur": 41.984, + "args": { + "External id": 463218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2379456, + "ts": 6267454029353.721, "dur": 40.184, + "args": { + "External id": 463219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454029551.002, "dur": 19.125, + "args": { + "External id": 463220,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 6003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454029590.705, "dur": 33.634, + "args": { + "External id": 463221,"Record function id": 0, "Ev Idx": 6004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454029609.752, "dur": 11.925, + "args": { + "External id": 463222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 6005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454029613.672, "dur": 6.567, + "args": { + "External id": 463223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 6006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454029615.658, "dur": 4.260, + "args": { + "External id": 463224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 6007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454029630.967, "dur": 17.249, + "args": { + "External id": 463225,"Record function id": 0, "Sequence number": 5036794, "Fwd thread id": 1, "Ev Idx": 6008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454029635.185, "dur": 10.578, + "args": { + "External id": 463226,"Sequence number": 5036794, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6009 + } + }, + { + "ph": "f", "id": 249, "pid": 2338707, "tid": 2379456, "ts": 6267454029635.185, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454029639.156, "dur": 6.357, + "args": { + "External id": 463227,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454029641.189, "dur": 4.155, + "args": { + "External id": 463228,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454029652.272, "dur": 88.915, + "args": { + "External id": 463229,"Record function id": 0, "Sequence number": 5036793, "Fwd thread id": 1, "Ev Idx": 6012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454029653.362, "dur": 83.177, + "args": { + "External id": 463230,"Sequence number": 5036793, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6013 + } + }, + { + "ph": "f", "id": 250, "pid": 2338707, "tid": 2379456, "ts": 6267454029653.362, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454029655.956, "dur": 80.013, + "args": { + "External id": 463231,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267454029659.405, "dur": 32.302, + "args": { + "External id": 463232,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454029661.560, "dur": 5.058, + "args": { + "External id": 463233,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267454029668.039, "dur": 23.339, + "args": { + "External id": 463234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267454029669.967, "dur": 20.772, + "args": { + "External id": 463235,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454029693.759, "dur": 8.440, + "args": { + "External id": 463236,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 6019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454029700.259, "dur": 1.495, + "args": { + "External id": 463237,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 6020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454029706.087, "dur": 29.048, + "args": { + "External id": 463238,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454029746.451, "dur": 65.302, + "args": { + "External id": 463239,"Record function id": 0, "Sequence number": 5036792, "Fwd thread id": 1, "Ev Idx": 6022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454029747.896, "dur": 60.359, + "args": { + "External id": 463240,"Sequence number": 5036792, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6023 + } + }, + { + "ph": "f", "id": 251, "pid": 2338707, "tid": 2379456, "ts": 6267454029747.896, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454029750.015, "dur": 57.792, + "args": { + "External id": 463241,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 6024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267454029753.076, "dur": 21.592, + "args": { + "External id": 463242,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454029754.151, "dur": 3.175, + "args": { + "External id": 463243,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267454029758.121, "dur": 16.095, + "args": { + "External id": 463244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267454029759.009, "dur": 14.783, + "args": { + "External id": 463245,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 6028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2379456, + "ts": 6267454029776.088, "dur": 11.855, + "args": { + "External id": 463246,"Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454029783.056, "dur": 4.200, + "args": { + "External id": 463247,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454029788.916, "dur": 18.259, + "args": { + "External id": 463248,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454029816.644, "dur": 144.433, + "args": { + "External id": 463249,"Record function id": 0, "Sequence number": 5036791, "Fwd thread id": 1, "Ev Idx": 6032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454029817.748, "dur": 138.773, + "args": { + "External id": 463250,"Sequence number": 5036791, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6033 + } + }, + { + "ph": "f", "id": 252, "pid": 2338707, "tid": 2379456, "ts": 6267454029817.748, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454029819.960, "dur": 136.066, + "args": { + "External id": 463251,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 6034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267454029821.987, "dur": 25.545, + "args": { + "External id": 463252,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454029823.623, "dur": 2.518, + "args": { + "External id": 463253,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267454029829.939, "dur": 17.221, + "args": { + "External id": 463254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267454029832.380, "dur": 14.344, + "args": { + "External id": 463255,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 6038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454029851.376, "dur": 3.915, + "args": { + "External id": 463256,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 6039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454029854.234, "dur": 0.805, + "args": { + "External id": 463257,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454029856.167, "dur": 98.732, + "args": { + "External id": 463258,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 6041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454029966.672, "dur": 156.393, + "args": { + "External id": 463259,"Record function id": 0, "Sequence number": 5036790, "Fwd thread id": 1, "Ev Idx": 6042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454029968.161, "dur": 124.531, + "args": { + "External id": 463260,"Sequence number": 5036790, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6043 + } + }, + { + "ph": "f", "id": 253, "pid": 2338707, "tid": 2379456, "ts": 6267454029968.161, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454029970.308, "dur": 121.708, + "args": { + "External id": 463261,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 6044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2379456, + "ts": 6267454029971.885, "dur": 18.873, + "args": { + "External id": 463262,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454029973.285, "dur": 2.569, + "args": { + "External id": 463263,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2379456, + "ts": 6267454029976.856, "dur": 13.595, + "args": { + "External id": 463264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2379456, + "ts": 6267454029978.457, "dur": 11.620, + "args": { + "External id": 463265,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 6048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454029991.857, "dur": 3.169, + "args": { + "External id": 463266,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 6049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454029994.260, "dur": 0.572, + "args": { + "External id": 463267,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454029998.465, "dur": 91.951, + "args": { + "External id": 463268,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 6051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454030100.362, "dur": 19.945, + "args": { + "External id": 463269,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 6052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454030132.949, "dur": 372.763, + "args": { + "External id": 463270,"Record function id": 0, "Sequence number": 5036789, "Fwd thread id": 1, "Ev Idx": 6053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454030134.912, "dur": 361.546, + "args": { + "External id": 463271,"Sequence number": 5036789, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6054 + } + }, + { + "ph": "f", "id": 254, "pid": 2338707, "tid": 2379456, "ts": 6267454030134.912, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454030313.427, "dur": 48.760, + "args": { + "External id": 463272,"kernel_hash": "cze4nwoznzp5ccj3536wygl2i2ykkdyt3hdmkvx2y6tl7tevwkdm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "131072", "4096", "1", "993", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ze/cze4nwoznzp5ccj3536wygl2i2ykkdyt3hdmkvx2y6tl7tevwkdm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [4096], [131072, 4096], [131072, 4096], [132, 4096], [131072], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454030399.114, "dur": 29.653, + "args": { + "External id": 463273,"kernel_hash": "ccswrdngzi6jo4g2p2v6rs3fdbvr7wkgz5izdhrj3lqu7uzzy4ad", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/cs/ccswrdngzi6jo4g2p2v6rs3fdbvr7wkgz5izdhrj3lqu7uzzy4ad.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 6056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454030447.841, "dur": 22.587, + "args": { + "External id": 463274,"kernel_hash": "cddm6binnqpaq4idvznth7jqun5cvp5pc6dr7dh3ezznaxoowmjo", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/dd/cddm6binnqpaq4idvznth7jqun5cvp5pc6dr7dh3ezznaxoowmjo.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 6057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454030516.610, "dur": 13.537, + "args": { + "External id": 463275,"Record function id": 0, "Ev Idx": 6058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454030519.428, "dur": 9.784, + "args": { + "External id": 463276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454030522.647, "dur": 5.708, + "args": { + "External id": 463277,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454030524.111, "dur": 4.106, + "args": { + "External id": 463278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: StackBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454030535.347, "dur": 40.226, + "args": { + "External id": 463279,"Record function id": 0, "Sequence number": 5036788, "Fwd thread id": 1, "Ev Idx": 6062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "StackBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454030536.708, "dur": 31.507, + "args": { + "External id": 463280,"Sequence number": 5036788, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6063 + } + }, + { + "ph": "f", "id": 255, "pid": 2338707, "tid": 2379456, "ts": 6267454030536.708, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2379456, + "ts": 6267454030539.241, "dur": 11.246, + "args": { + "External id": 463281,"Record function id": 0, "Concrete Inputs": ["", "-2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454030546.396, "dur": 1.988, + "args": { + "External id": 463282,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2379456, + "ts": 6267454030551.253, "dur": 4.308, + "args": { + "External id": 463283,"Record function id": 0, "Concrete Inputs": ["", "-2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454030553.471, "dur": 0.444, + "args": { + "External id": 463284,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2379456, + "ts": 6267454030556.002, "dur": 6.675, + "args": { + "External id": 463285,"Record function id": 0, "Concrete Inputs": ["", "-2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454030560.901, "dur": 0.584, + "args": { + "External id": 463286,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2379456, + "ts": 6267454030563.253, "dur": 4.210, + "args": { + "External id": 463287,"Record function id": 0, "Concrete Inputs": ["", "-2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454030565.652, "dur": 0.548, + "args": { + "External id": 463288,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454030580.180, "dur": 6.304, + "args": { + "External id": 463289,"Record function id": 0, "Sequence number": 5036787, "Fwd thread id": 1, "Ev Idx": 6072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454030581.136, "dur": 1.508, + "args": { + "External id": 463290,"Sequence number": 5036787, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6073 + } + }, + { + "ph": "f", "id": 256, "pid": 2338707, "tid": 2379456, "ts": 6267454030581.136, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454030609.651, "dur": 639.120, + "args": { + "External id": 463291,"Record function id": 0, "Sequence number": 5036786, "Fwd thread id": 1, "Ev Idx": 6074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454030612.517, "dur": 618.009, + "args": { + "External id": 463292,"Sequence number": 5036786, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6075 + } + }, + { + "ph": "f", "id": 257, "pid": 2338707, "tid": 2379456, "ts": 6267454030612.517, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454030655.928, "dur": 12.207, + "args": { + "External id": 463293,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454030662.161, "dur": 5.437, + "args": { + "External id": 463294,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 6077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454030672.637, "dur": 8.197, + "args": { + "External id": 463295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454030675.537, "dur": 4.265, + "args": { + "External id": 463296,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454030678.476, "dur": 1.046, + "args": { + "External id": 463297,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2379456, + "ts": 6267454030685.128, "dur": 153.548, + "args": { + "External id": 463298,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 6081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454030688.549, "dur": 7.008, + "args": { + "External id": 463299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 6082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454030689.529, "dur": 4.788, + "args": { + "External id": 463300,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454030691.104, "dur": 3.104, + "args": { + "External id": 463301,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2379456, + "ts": 6267454030700.643, "dur": 136.868, + "args": { + "External id": 463302,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454030702.892, "dur": 133.457, + "args": { + "External id": 463303,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267454030844.679, "dur": 5.749, + "args": { + "External id": 463304,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 6087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454030847.057, "dur": 3.256, + "args": { + "External id": 463305,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454030891.618, "dur": 7.523, + "args": { + "External id": 463306,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454030900.560, "dur": 2.723, + "args": { + "External id": 463307,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454030904.472, "dur": 2.657, + "args": { + "External id": 463308,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454030952.720, "dur": 3.035, + "args": { + "External id": 463309,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454030953.795, "dur": 1.781, + "args": { + "External id": 463310,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338707, "tid": 2379456, + "ts": 6267454030986.096, "dur": 214.899, + "args": { + "External id": 463311,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 6094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2379456, + "ts": 6267454030992.493, "dur": 7.833, + "args": { + "External id": 463312,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454030997.764, "dur": 1.061, + "args": { + "External id": 463313,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454031002.238, "dur": 11.580, + "args": { + "External id": 463314,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 6097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454031008.996, "dur": 3.402, + "args": { + "External id": 463315,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 6098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2379456, + "ts": 6267454031015.866, "dur": 3.129, + "args": { + "External id": 463316,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454031018.010, "dur": 0.507, + "args": { + "External id": 463317,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454031020.148, "dur": 4.205, + "args": { + "External id": 463318,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454031022.758, "dur": 0.681, + "args": { + "External id": 463319,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 6102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454031029.128, "dur": 3.604, + "args": { + "External id": 463320,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 6103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454031031.611, "dur": 0.743, + "args": { + "External id": 463321,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 6104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454031034.503, "dur": 7.179, + "args": { + "External id": 463322,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 6105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454031039.136, "dur": 2.312, + "args": { + "External id": 463323,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 6106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454031044.913, "dur": 2.955, + "args": { + "External id": 463324,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 6107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454031047.062, "dur": 0.399, + "args": { + "External id": 463325,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 6108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454031049.041, "dur": 3.304, + "args": { + "External id": 463326,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454031050.362, "dur": 1.890, + "args": { + "External id": 463327,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267454031097.244, "dur": 83.848, + "args": { + "External id": 463328,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 6111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454031184.610, "dur": 4.652, + "args": { + "External id": 463329,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 6112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454031190.458, "dur": 5.163, + "args": { + "External id": 463330,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 6113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454031193.307, "dur": 1.238, + "args": { + "External id": 463331,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 6114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454031198.468, "dur": 1.090, + "args": { + "External id": 463332,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 6115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454031265.670, "dur": 13.452, + "args": { + "External id": 463333,"Record function id": 0, "Ev Idx": 6116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454031268.543, "dur": 9.699, + "args": { + "External id": 463334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454031272.360, "dur": 4.861, + "args": { + "External id": 463335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454031273.994, "dur": 3.074, + "args": { + "External id": 463336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454031285.097, "dur": 9.893, + "args": { + "External id": 463337,"Record function id": 0, "Sequence number": 5036785, "Fwd thread id": 1, "Ev Idx": 6120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454031286.736, "dur": 5.841, + "args": { + "External id": 463338,"Sequence number": 5036785, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6121 + } + }, + { + "ph": "f", "id": 258, "pid": 2338707, "tid": 2379456, "ts": 6267454031286.736, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454031289.527, "dur": 2.784, + "args": { + "External id": 463339,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454031290.859, "dur": 1.263, + "args": { + "External id": 463340,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454031299.727, "dur": 159.395, + "args": { + "External id": 463341,"Record function id": 0, "Sequence number": 5036784, "Fwd thread id": 1, "Ev Idx": 6124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454031300.717, "dur": 150.394, + "args": { + "External id": 463342,"Sequence number": 5036784, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6125 + } + }, + { + "ph": "f", "id": 259, "pid": 2338707, "tid": 2379456, "ts": 6267454031300.717, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454031304.503, "dur": 8.867, + "args": { + "External id": 463343,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454031309.120, "dur": 3.425, + "args": { + "External id": 463344,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454031311.545, "dur": 0.770, + "args": { + "External id": 463345,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454031314.938, "dur": 62.464, + "args": { + "External id": 463346,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454031378.745, "dur": 5.359, + "args": { + "External id": 463347,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454031379.829, "dur": 3.559, + "args": { + "External id": 463348,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454031381.898, "dur": 1.307, + "args": { + "External id": 463349,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454031388.929, "dur": 4.208, + "args": { + "External id": 463350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454031390.300, "dur": 2.264, + "args": { + "External id": 463351,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454031391.773, "dur": 0.647, + "args": { + "External id": 463352,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454031393.783, "dur": 56.090, + "args": { + "External id": 463353,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454031465.165, "dur": 11.459, + "args": { + "External id": 463354,"Record function id": 0, "Sequence number": 5036783, "Fwd thread id": 1, "Ev Idx": 6137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454031466.432, "dur": 8.216, + "args": { + "External id": 463355,"Sequence number": 5036783, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6138 + } + }, + { + "ph": "f", "id": 260, "pid": 2338707, "tid": 2379456, "ts": 6267454031466.432, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454031468.954, "dur": 5.510, + "args": { + "External id": 463356,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454031470.604, "dur": 3.673, + "args": { + "External id": 463357,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454031480.825, "dur": 80.963, + "args": { + "External id": 463358,"Record function id": 0, "Sequence number": 5036782, "Fwd thread id": 1, "Ev Idx": 6141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454031481.997, "dur": 76.970, + "args": { + "External id": 463359,"Sequence number": 5036782, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6142 + } + }, + { + "ph": "f", "id": 261, "pid": 2338707, "tid": 2379456, "ts": 6267454031481.997, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454031485.380, "dur": 73.304, + "args": { + "External id": 463360,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454031554.959, "dur": 3.052, + "args": { + "External id": 467969,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454031557.136, "dur": 0.783, + "args": { + "External id": 467970,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454031566.886, "dur": 6.110, + "args": { + "External id": 467971,"Record function id": 0, "Ev Idx": 6146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454031568.665, "dur": 3.700, + "args": { + "External id": 467972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454031569.995, "dur": 2.035, + "args": { + "External id": 467973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454031570.538, "dur": 1.394, + "args": { + "External id": 467974,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454031577.331, "dur": 9.581, + "args": { + "External id": 467975,"Record function id": 0, "Sequence number": 5036781, "Fwd thread id": 1, "Ev Idx": 6150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454031578.704, "dur": 5.867, + "args": { + "External id": 467976,"Sequence number": 5036781, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6151 + } + }, + { + "ph": "f", "id": 262, "pid": 2338707, "tid": 2379456, "ts": 6267454031578.704, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454031579.656, "dur": 4.722, + "args": { + "External id": 467977,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454031583.245, "dur": 0.975, + "args": { + "External id": 467978,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454031591.090, "dur": 157.433, + "args": { + "External id": 467979,"Record function id": 0, "Sequence number": 5036780, "Fwd thread id": 1, "Ev Idx": 6154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454031608.682, "dur": 129.655, + "args": { + "External id": 467980,"Sequence number": 5036780, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6155 + } + }, + { + "ph": "f", "id": 263, "pid": 2338707, "tid": 2379456, "ts": 6267454031608.682, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454031611.964, "dur": 4.748, + "args": { + "External id": 467981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454031612.998, "dur": 2.988, + "args": { + "External id": 467982,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454031614.730, "dur": 0.957, + "args": { + "External id": 467983,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454031617.452, "dur": 61.395, + "args": { + "External id": 467984,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454031682.778, "dur": 3.752, + "args": { + "External id": 467985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454031683.281, "dur": 2.652, + "args": { + "External id": 467986,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454031685.154, "dur": 0.635, + "args": { + "External id": 467987,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454031687.688, "dur": 3.939, + "args": { + "External id": 467988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454031688.737, "dur": 2.165, + "args": { + "External id": 467989,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454031690.485, "dur": 0.337, + "args": { + "External id": 467990,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454031694.558, "dur": 42.952, + "args": { + "External id": 467991,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454031755.894, "dur": 43.368, + "args": { + "External id": 467992,"Record function id": 0, "Sequence number": 5036779, "Fwd thread id": 1, "Ev Idx": 6167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454031757.158, "dur": 5.209, + "args": { + "External id": 467993,"Sequence number": 5036779, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6168 + } + }, + { + "ph": "f", "id": 264, "pid": 2338707, "tid": 2379456, "ts": 6267454031757.158, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454031759.491, "dur": 2.720, + "args": { + "External id": 467994,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454031760.849, "dur": 1.213, + "args": { + "External id": 467995,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267454031766.192, "dur": 29.838, + "args": { + "External id": 467996,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454031803.663, "dur": 10.874, + "args": { + "External id": 467997,"Record function id": 0, "Sequence number": 5036778, "Fwd thread id": 1, "Ev Idx": 6172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454031804.833, "dur": 7.133, + "args": { + "External id": 467998,"Sequence number": 5036778, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6173 + } + }, + { + "ph": "f", "id": 265, "pid": 2338707, "tid": 2379456, "ts": 6267454031804.833, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454031805.526, "dur": 6.211, + "args": { + "External id": 467999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454031806.303, "dur": 4.806, + "args": { + "External id": 468000,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454031810.252, "dur": 0.698, + "args": { + "External id": 468001,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454031820.112, "dur": 6.153, + "args": { + "External id": 468002,"Record function id": 0, "Ev Idx": 6177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454031821.562, "dur": 4.066, + "args": { + "External id": 468003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454031822.879, "dur": 2.389, + "args": { + "External id": 468004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454031823.647, "dur": 1.482, + "args": { + "External id": 468005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454031831.148, "dur": 547.059, + "args": { + "External id": 468006,"Record function id": 0, "Sequence number": 5036777, "Fwd thread id": 1, "Ev Idx": 6181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454031832.965, "dur": 527.696, + "args": { + "External id": 468007,"Sequence number": 5036777, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 6182 + } + }, + { + "ph": "f", "id": 266, "pid": 2338707, "tid": 2379456, "ts": 6267454031832.965, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2379456, + "ts": 6267454031864.519, "dur": 41.073, + "args": { + "External id": 468008,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267454031866.623, "dur": 38.685, + "args": { + "External id": 468009,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454031870.091, "dur": 7.604, + "args": { + "External id": 468010,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454031873.216, "dur": 3.807, + "args": { + "External id": 468011,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454031879.463, "dur": 25.050, + "args": { + "External id": 468012,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454031921.701, "dur": 5.131, + "args": { + "External id": 468013,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454031925.324, "dur": 1.345, + "args": { + "External id": 468014,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454031931.346, "dur": 2.041, + "args": { + "External id": 468015,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454031932.413, "dur": 0.858, + "args": { + "External id": 468016,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454031948.957, "dur": 5.010, + "args": { + "External id": 468017,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454031967.899, "dur": 2.786, + "args": { + "External id": 468018,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454032224.886, "dur": 3.945, + "args": { + "External id": 468019,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454032234.529, "dur": 45.383, + "args": { + "External id": 468020,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454032250.115, "dur": 1.369, + "args": { + "External id": 468021,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454032287.051, "dur": 33.088, + "args": { + "External id": 468022,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454032289.062, "dur": 30.824, + "args": { + "External id": 468023,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454032294.570, "dur": 5.227, + "args": { + "External id": 468024,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454032301.487, "dur": 17.772, + "args": { + "External id": 468025,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267454032328.206, "dur": 3.251, + "args": { + "External id": 468026,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454032330.073, "dur": 1.226, + "args": { + "External id": 468027,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454032341.553, "dur": 3.087, + "args": { + "External id": 468028,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454032342.963, "dur": 1.549, + "args": { + "External id": 468029,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454032347.514, "dur": 2.882, + "args": { + "External id": 468030,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454032349.073, "dur": 1.221, + "args": { + "External id": 468031,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454032397.692, "dur": 11.319, + "args": { + "External id": 468032,"Record function id": 0, "Ev Idx": 6207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454032400.542, "dur": 7.726, + "args": { + "External id": 468033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454032403.615, "dur": 3.694, + "args": { + "External id": 468034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454032404.797, "dur": 2.401, + "args": { + "External id": 468035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454032413.640, "dur": 9.855, + "args": { + "External id": 468036,"Record function id": 0, "Sequence number": 5036776, "Fwd thread id": 1, "Ev Idx": 6211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454032415.006, "dur": 5.199, + "args": { + "External id": 468037,"Sequence number": 5036776, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6212 + } + }, + { + "ph": "f", "id": 267, "pid": 2338707, "tid": 2379456, "ts": 6267454032415.006, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454032417.021, "dur": 2.905, + "args": { + "External id": 468038,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454032418.167, "dur": 1.531, + "args": { + "External id": 468039,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454032428.009, "dur": 203.033, + "args": { + "External id": 468040,"Record function id": 0, "Sequence number": 5036775, "Fwd thread id": 1, "Ev Idx": 6215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454032429.067, "dur": 195.525, + "args": { + "External id": 468041,"Sequence number": 5036775, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6216 + } + }, + { + "ph": "f", "id": 268, "pid": 2338707, "tid": 2379456, "ts": 6267454032429.067, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454032435.828, "dur": 6.054, + "args": { + "External id": 468042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454032437.954, "dur": 3.205, + "args": { + "External id": 468043,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454032439.861, "dur": 1.066, + "args": { + "External id": 468044,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454032443.517, "dur": 92.835, + "args": { + "External id": 468045,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454032538.163, "dur": 5.244, + "args": { + "External id": 468046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454032539.067, "dur": 3.509, + "args": { + "External id": 468047,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454032541.259, "dur": 1.123, + "args": { + "External id": 468048,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454032548.094, "dur": 5.931, + "args": { + "External id": 468049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454032548.877, "dur": 4.545, + "args": { + "External id": 468050,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454032550.440, "dur": 2.847, + "args": { + "External id": 468051,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454032554.925, "dur": 68.032, + "args": { + "External id": 468052,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454032640.588, "dur": 10.249, + "args": { + "External id": 468053,"Record function id": 0, "Sequence number": 5036774, "Fwd thread id": 1, "Ev Idx": 6228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454032642.087, "dur": 5.943, + "args": { + "External id": 468054,"Sequence number": 5036774, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6229 + } + }, + { + "ph": "f", "id": 269, "pid": 2338707, "tid": 2379456, "ts": 6267454032642.087, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454032644.429, "dur": 3.394, + "args": { + "External id": 468055,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454032645.870, "dur": 1.835, + "args": { + "External id": 468056,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454032654.862, "dur": 12.137, + "args": { + "External id": 468057,"Record function id": 0, "Sequence number": 5036773, "Fwd thread id": 1, "Ev Idx": 6232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454032656.174, "dur": 8.400, + "args": { + "External id": 468058,"Sequence number": 5036773, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6233 + } + }, + { + "ph": "f", "id": 270, "pid": 2338707, "tid": 2379456, "ts": 6267454032656.174, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454032659.993, "dur": 4.324, + "args": { + "External id": 468059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454032661.202, "dur": 2.533, + "args": { + "External id": 468060,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454032662.934, "dur": 0.637, + "args": { + "External id": 468061,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454032672.372, "dur": 8.002, + "args": { + "External id": 468062,"Record function id": 0, "Ev Idx": 6237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454032674.215, "dur": 5.446, + "args": { + "External id": 468063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454032676.471, "dur": 2.841, + "args": { + "External id": 468064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454032677.279, "dur": 1.895, + "args": { + "External id": 468065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454032684.493, "dur": 10.088, + "args": { + "External id": 468066,"Record function id": 0, "Sequence number": 5036772, "Fwd thread id": 1, "Ev Idx": 6241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454032685.502, "dur": 6.612, + "args": { + "External id": 468067,"Sequence number": 5036772, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6242 + } + }, + { + "ph": "f", "id": 271, "pid": 2338707, "tid": 2379456, "ts": 6267454032685.502, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454032687.004, "dur": 4.917, + "args": { + "External id": 468068,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454032690.519, "dur": 1.258, + "args": { + "External id": 468069,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454032699.914, "dur": 492.308, + "args": { + "External id": 468070,"Record function id": 0, "Sequence number": 5036771, "Fwd thread id": 1, "Ev Idx": 6245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454032701.939, "dur": 467.204, + "args": { + "External id": 468071,"Sequence number": 5036771, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6246 + } + }, + { + "ph": "f", "id": 272, "pid": 2338707, "tid": 2379456, "ts": 6267454032701.939, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454032723.398, "dur": 9.868, + "args": { + "External id": 468072,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454032726.825, "dur": 5.915, + "args": { + "External id": 468073,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454032736.019, "dur": 4.507, + "args": { + "External id": 468074,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454032738.066, "dur": 2.262, + "args": { + "External id": 468075,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454032741.883, "dur": 7.170, + "args": { + "External id": 468076,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454032743.949, "dur": 4.913, + "args": { + "External id": 468077,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454032788.025, "dur": 345.864, + "args": { + "External id": 468078,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 6253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454032884.017, "dur": 6.760, + "args": { + "External id": 468079,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454032895.537, "dur": 3.463, + "args": { + "External id": 468080,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454032900.801, "dur": 4.050, + "args": { + "External id": 468081,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454032906.328, "dur": 3.488, + "args": { + "External id": 468082,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454032969.880, "dur": 3.569, + "args": { + "External id": 468083,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454032971.433, "dur": 1.819, + "args": { + "External id": 468084,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454032975.337, "dur": 33.007, + "args": { + "External id": 468085,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454032981.997, "dur": 0.920, + "args": { + "External id": 468086,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454033010.385, "dur": 2.211, + "args": { + "External id": 468087,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454033011.759, "dur": 0.743, + "args": { + "External id": 468088,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454033016.043, "dur": 19.787, + "args": { + "External id": 468089,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454033020.212, "dur": 0.506, + "args": { + "External id": 468090,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454033153.420, "dur": 4.893, + "args": { + "External id": 468091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454033162.112, "dur": 1.090, + "args": { + "External id": 468092,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454033165.355, "dur": 0.665, + "args": { + "External id": 468093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454033205.842, "dur": 284.968, + "args": { + "External id": 468094,"Record function id": 0, "Sequence number": 5036770, "Fwd thread id": 1, "Ev Idx": 6269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454033208.303, "dur": 274.698, + "args": { + "External id": 468095,"Sequence number": 5036770, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6270 + } + }, + { + "ph": "f", "id": 273, "pid": 2338707, "tid": 2379456, "ts": 6267454033208.303, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267454033234.199, "dur": 63.433, + "args": { + "External id": 468096,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454033238.517, "dur": 5.393, + "args": { + "External id": 468097,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454033246.071, "dur": 50.832, + "args": { + "External id": 468098,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 6273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454033310.415, "dur": 5.942, + "args": { + "External id": 468099,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454033313.698, "dur": 2.211, + "args": { + "External id": 468100,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454033499.621, "dur": 231.321, + "args": { + "External id": 468101,"Record function id": 0, "Sequence number": 5036769, "Fwd thread id": 1, "Ev Idx": 6276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454033501.413, "dur": 220.711, + "args": { + "External id": 468102,"Sequence number": 5036769, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6277 + } + }, + { + "ph": "f", "id": 274, "pid": 2338707, "tid": 2379456, "ts": 6267454033501.413, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267454033515.563, "dur": 54.374, + "args": { + "External id": 468103,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454033519.172, "dur": 3.479, + "args": { + "External id": 468104,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454033524.031, "dur": 45.205, + "args": { + "External id": 468105,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 6280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454033578.992, "dur": 6.045, + "args": { + "External id": 468106,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454033581.550, "dur": 3.127, + "args": { + "External id": 468107,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454033741.407, "dur": 18.214, + "args": { + "External id": 468108,"Record function id": 0, "Sequence number": 5036768, "Fwd thread id": 1, "Ev Idx": 6283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454033745.454, "dur": 11.006, + "args": { + "External id": 468109,"Sequence number": 5036768, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6284 + } + }, + { + "ph": "f", "id": 275, "pid": 2338707, "tid": 2379456, "ts": 6267454033745.454, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454033748.803, "dur": 7.262, + "args": { + "External id": 468110,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454033750.270, "dur": 5.537, + "args": { + "External id": 468111,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454033763.998, "dur": 7.570, + "args": { + "External id": 468112,"Record function id": 0, "Sequence number": 5036767, "Fwd thread id": 1, "Ev Idx": 6287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454033765.146, "dur": 4.330, + "args": { + "External id": 468113,"Sequence number": 5036767, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6288 + } + }, + { + "ph": "f", "id": 276, "pid": 2338707, "tid": 2379456, "ts": 6267454033765.146, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454033767.070, "dur": 2.203, + "args": { + "External id": 468114,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454033767.931, "dur": 1.197, + "args": { + "External id": 468115,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454033775.576, "dur": 13.704, + "args": { + "External id": 468116,"Record function id": 0, "Sequence number": 5036766, "Fwd thread id": 1, "Ev Idx": 6291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454033780.827, "dur": 6.288, + "args": { + "External id": 468117,"Sequence number": 5036766, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6292 + } + }, + { + "ph": "f", "id": 277, "pid": 2338707, "tid": 2379456, "ts": 6267454033780.827, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454033782.229, "dur": 4.696, + "args": { + "External id": 468118,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454033785.731, "dur": 1.046, + "args": { + "External id": 468119,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454033793.641, "dur": 9.850, + "args": { + "External id": 468120,"Record function id": 0, "Sequence number": 5036765, "Fwd thread id": 1, "Ev Idx": 6295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454033794.495, "dur": 6.168, + "args": { + "External id": 468121,"Sequence number": 5036765, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6296 + } + }, + { + "ph": "f", "id": 278, "pid": 2338707, "tid": 2379456, "ts": 6267454033794.495, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454033795.892, "dur": 4.583, + "args": { + "External id": 468122,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454033796.413, "dur": 3.960, + "args": { + "External id": 468123,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454033807.657, "dur": 187.348, + "args": { + "External id": 468124,"Record function id": 0, "Sequence number": 5036764, "Fwd thread id": 1, "Ev Idx": 6299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454033808.793, "dur": 178.475, + "args": { + "External id": 468125,"Sequence number": 5036764, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6300 + } + }, + { + "ph": "f", "id": 279, "pid": 2338707, "tid": 2379456, "ts": 6267454033808.793, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454033812.990, "dur": 11.176, + "args": { + "External id": 468126,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454033815.319, "dur": 8.017, + "args": { + "External id": 468127,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454033821.278, "dur": 1.744, + "args": { + "External id": 468128,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454033826.237, "dur": 89.288, + "args": { + "External id": 468129,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454033917.033, "dur": 5.163, + "args": { + "External id": 468130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454033917.704, "dur": 3.335, + "args": { + "External id": 468131,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454033919.884, "dur": 1.029, + "args": { + "External id": 468132,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454033924.150, "dur": 5.505, + "args": { + "External id": 468133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454033927.536, "dur": 1.562, + "args": { + "External id": 468134,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454033928.573, "dur": 0.423, + "args": { + "External id": 468135,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454033930.638, "dur": 55.738, + "args": { + "External id": 468136,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034000.998, "dur": 7.180, + "args": { + "External id": 468137,"Record function id": 0, "Sequence number": 5036763, "Fwd thread id": 1, "Ev Idx": 6312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034002.163, "dur": 4.324, + "args": { + "External id": 468138,"Sequence number": 5036763, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6313 + } + }, + { + "ph": "f", "id": 280, "pid": 2338707, "tid": 2379456, "ts": 6267454034002.163, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454034003.664, "dur": 2.653, + "args": { + "External id": 468139,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454034004.931, "dur": 1.259, + "args": { + "External id": 468140,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034012.454, "dur": 10.424, + "args": { + "External id": 468141,"Record function id": 0, "Sequence number": 5036762, "Fwd thread id": 1, "Ev Idx": 6316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034015.869, "dur": 4.658, + "args": { + "External id": 468142,"Sequence number": 5036762, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6317 + } + }, + { + "ph": "f", "id": 281, "pid": 2338707, "tid": 2379456, "ts": 6267454034015.869, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454034016.810, "dur": 3.457, + "args": { + "External id": 468143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454034017.571, "dur": 2.004, + "args": { + "External id": 468144,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454034018.980, "dur": 0.505, + "args": { + "External id": 468145,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454034029.978, "dur": 13.185, + "args": { + "External id": 468146,"Record function id": 0, "Ev Idx": 6321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454034031.693, "dur": 10.460, + "args": { + "External id": 468147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454034034.520, "dur": 7.155, + "args": { + "External id": 468148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454034036.177, "dur": 5.388, + "args": { + "External id": 468149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034047.705, "dur": 54.519, + "args": { + "External id": 468150,"Record function id": 0, "Sequence number": 5036761, "Fwd thread id": 1, "Ev Idx": 6325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034049.124, "dur": 49.074, + "args": { + "External id": 468151,"Sequence number": 5036761, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6326 + } + }, + { + "ph": "f", "id": 282, "pid": 2338707, "tid": 2379456, "ts": 6267454034049.124, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454034050.468, "dur": 47.508, + "args": { + "External id": 468152,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454034095.568, "dur": 2.061, + "args": { + "External id": 468153,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034108.559, "dur": 123.918, + "args": { + "External id": 468154,"Record function id": 0, "Sequence number": 5036760, "Fwd thread id": 1, "Ev Idx": 6329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034109.603, "dur": 113.854, + "args": { + "External id": 468155,"Sequence number": 5036760, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6330 + } + }, + { + "ph": "f", "id": 283, "pid": 2338707, "tid": 2379456, "ts": 6267454034109.603, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454034112.837, "dur": 4.449, + "args": { + "External id": 468156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454034114.168, "dur": 2.590, + "args": { + "External id": 468157,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454034115.968, "dur": 0.662, + "args": { + "External id": 468158,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454034120.623, "dur": 42.282, + "args": { + "External id": 468159,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454034164.297, "dur": 8.581, + "args": { + "External id": 468160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454034165.221, "dur": 6.882, + "args": { + "External id": 468161,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454034169.571, "dur": 2.353, + "args": { + "External id": 468162,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454034174.125, "dur": 6.147, + "args": { + "External id": 468163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454034174.953, "dur": 4.786, + "args": { + "External id": 468164,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454034178.956, "dur": 0.654, + "args": { + "External id": 468165,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454034180.952, "dur": 41.453, + "args": { + "External id": 468166,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034238.429, "dur": 42.740, + "args": { + "External id": 468167,"Record function id": 0, "Sequence number": 5036759, "Fwd thread id": 1, "Ev Idx": 6342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034239.959, "dur": 5.078, + "args": { + "External id": 468168,"Sequence number": 5036759, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6343 + } + }, + { + "ph": "f", "id": 284, "pid": 2338707, "tid": 2379456, "ts": 6267454034239.959, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454034242.093, "dur": 2.769, + "args": { + "External id": 468169,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454034243.189, "dur": 1.562, + "args": { + "External id": 468170,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267454034248.865, "dur": 29.546, + "args": { + "External id": 468171,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034286.057, "dur": 12.421, + "args": { + "External id": 468172,"Record function id": 0, "Sequence number": 5036758, "Fwd thread id": 1, "Ev Idx": 6347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034287.363, "dur": 8.825, + "args": { + "External id": 468173,"Sequence number": 5036758, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6348 + } + }, + { + "ph": "f", "id": 285, "pid": 2338707, "tid": 2379456, "ts": 6267454034287.363, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454034291.215, "dur": 4.757, + "args": { + "External id": 468174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454034292.564, "dur": 2.745, + "args": { + "External id": 468175,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454034294.343, "dur": 0.826, + "args": { + "External id": 468176,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454034303.490, "dur": 7.925, + "args": { + "External id": 468177,"Record function id": 0, "Ev Idx": 6352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454034305.381, "dur": 5.347, + "args": { + "External id": 468178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454034307.361, "dur": 2.991, + "args": { + "External id": 468179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454034308.272, "dur": 1.960, + "args": { + "External id": 468180,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034315.632, "dur": 10.143, + "args": { + "External id": 468181,"Record function id": 0, "Sequence number": 5036757, "Fwd thread id": 1, "Ev Idx": 6356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034317.188, "dur": 6.381, + "args": { + "External id": 468182,"Sequence number": 5036757, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6357 + } + }, + { + "ph": "f", "id": 286, "pid": 2338707, "tid": 2379456, "ts": 6267454034317.188, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454034318.182, "dur": 5.218, + "args": { + "External id": 468183,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454034322.232, "dur": 1.019, + "args": { + "External id": 468184,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034329.944, "dur": 113.598, + "args": { + "External id": 468185,"Record function id": 0, "Sequence number": 5036756, "Fwd thread id": 1, "Ev Idx": 6360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034330.858, "dur": 103.068, + "args": { + "External id": 468186,"Sequence number": 5036756, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6361 + } + }, + { + "ph": "f", "id": 287, "pid": 2338707, "tid": 2379456, "ts": 6267454034330.858, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454034332.956, "dur": 3.589, + "args": { + "External id": 468187,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454034333.953, "dur": 2.018, + "args": { + "External id": 468188,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454034335.313, "dur": 0.546, + "args": { + "External id": 468189,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454034337.384, "dur": 40.261, + "args": { + "External id": 468190,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454034379.136, "dur": 7.362, + "args": { + "External id": 468191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454034382.623, "dur": 3.225, + "args": { + "External id": 468192,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454034384.693, "dur": 1.025, + "args": { + "External id": 468193,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454034387.825, "dur": 4.053, + "args": { + "External id": 468194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454034388.778, "dur": 2.560, + "args": { + "External id": 468195,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454034390.572, "dur": 0.691, + "args": { + "External id": 468196,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454034392.398, "dur": 40.616, + "args": { + "External id": 468197,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034449.136, "dur": 30.424, + "args": { + "External id": 468198,"Record function id": 0, "Sequence number": 5036755, "Fwd thread id": 1, "Ev Idx": 6373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034453.221, "dur": 4.652, + "args": { + "External id": 468199,"Sequence number": 5036755, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6374 + } + }, + { + "ph": "f", "id": 288, "pid": 2338707, "tid": 2379456, "ts": 6267454034453.221, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454034455.153, "dur": 2.550, + "args": { + "External id": 468200,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454034456.354, "dur": 1.195, + "args": { + "External id": 468201,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454034460.354, "dur": 16.844, + "args": { + "External id": 468202,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034483.840, "dur": 10.328, + "args": { + "External id": 468203,"Record function id": 0, "Sequence number": 5036754, "Fwd thread id": 1, "Ev Idx": 6378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454034484.789, "dur": 7.350, + "args": { + "External id": 468204,"Sequence number": 5036754, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6379 + } + }, + { + "ph": "f", "id": 289, "pid": 2338707, "tid": 2379456, "ts": 6267454034484.789, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454034485.936, "dur": 6.016, + "args": { + "External id": 468205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454034486.865, "dur": 4.381, + "args": { + "External id": 468206,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454034490.503, "dur": 0.622, + "args": { + "External id": 468207,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454034498.993, "dur": 5.440, + "args": { + "External id": 468208,"Record function id": 0, "Ev Idx": 6383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454034500.417, "dur": 3.350, + "args": { + "External id": 468209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454034501.520, "dur": 1.655, + "args": { + "External id": 468210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454034502.009, "dur": 1.020, + "args": { + "External id": 468211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454034510.193, "dur": 459.594, + "args": { + "External id": 468212,"Record function id": 0, "Sequence number": 5036753, "Fwd thread id": 1, "Ev Idx": 6387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454034512.215, "dur": 416.052, + "args": { + "External id": 468213,"Sequence number": 5036753, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6388 + } + }, + { + "ph": "f", "id": 290, "pid": 2338707, "tid": 2379456, "ts": 6267454034512.215, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454034551.705, "dur": 2.886, + "args": { + "External id": 468214,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454034553.108, "dur": 1.313, + "args": { + "External id": 468215,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454034572.450, "dur": 4.783, + "args": { + "External id": 468216,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454034590.929, "dur": 23.992, + "args": { + "External id": 468217,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454034804.900, "dur": 3.199, + "args": { + "External id": 468218,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454034812.965, "dur": 45.024, + "args": { + "External id": 468219,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454034827.410, "dur": 3.364, + "args": { + "External id": 468220,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454034865.175, "dur": 33.861, + "args": { + "External id": 468221,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454034867.288, "dur": 31.521, + "args": { + "External id": 468222,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454034872.489, "dur": 5.249, + "args": { + "External id": 468223,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454034879.519, "dur": 18.623, + "args": { + "External id": 468224,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267454034904.167, "dur": 5.647, + "args": { + "External id": 468225,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454034908.512, "dur": 1.190, + "args": { + "External id": 468226,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454034917.814, "dur": 2.467, + "args": { + "External id": 468227,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454034919.171, "dur": 1.006, + "args": { + "External id": 468228,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267454034942.172, "dur": 19.121, + "args": { + "External id": 468229,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454034986.098, "dur": 11.287, + "args": { + "External id": 468230,"Record function id": 0, "Ev Idx": 6405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454034989.112, "dur": 7.394, + "args": { + "External id": 468231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454034992.054, "dur": 3.453, + "args": { + "External id": 468232,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454034993.140, "dur": 2.201, + "args": { + "External id": 468233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035002.311, "dur": 6.718, + "args": { + "External id": 468234,"Record function id": 0, "Sequence number": 5036752, "Fwd thread id": 1, "Ev Idx": 6409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035003.837, "dur": 1.625, + "args": { + "External id": 468235,"Sequence number": 5036752, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6410 + } + }, + { + "ph": "f", "id": 291, "pid": 2338707, "tid": 2379456, "ts": 6267454035003.837, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454035013.883, "dur": 544.039, + "args": { + "External id": 468236,"Record function id": 0, "Sequence number": 5036751, "Fwd thread id": 1, "Ev Idx": 6411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454035015.751, "dur": 528.402, + "args": { + "External id": 468237,"Sequence number": 5036751, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6412 + } + }, + { + "ph": "f", "id": 292, "pid": 2338707, "tid": 2379456, "ts": 6267454035015.751, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454035052.995, "dur": 55.382, + "args": { + "External id": 468238,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454035102.883, "dur": 4.875, + "args": { + "External id": 468239,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 6414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454035113.144, "dur": 7.035, + "args": { + "External id": 468240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454035115.334, "dur": 3.937, + "args": { + "External id": 468241,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035117.708, "dur": 1.290, + "args": { + "External id": 468242,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2379456, + "ts": 6267454035124.364, "dur": 118.642, + "args": { + "External id": 468243,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 6418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454035125.669, "dur": 5.928, + "args": { + "External id": 468244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 6419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454035128.943, "dur": 2.005, + "args": { + "External id": 468245,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035130.226, "dur": 0.616, + "args": { + "External id": 468246,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2379456, + "ts": 6267454035132.945, "dur": 109.273, + "args": { + "External id": 468247,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454035134.743, "dur": 106.341, + "args": { + "External id": 468248,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267454035248.060, "dur": 3.937, + "args": { + "External id": 468249,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 6424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454035249.743, "dur": 2.085, + "args": { + "External id": 468250,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454035293.709, "dur": 8.162, + "args": { + "External id": 468251,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454035303.637, "dur": 3.069, + "args": { + "External id": 468252,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454035308.088, "dur": 2.326, + "args": { + "External id": 468253,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454035349.414, "dur": 3.163, + "args": { + "External id": 468254,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454035351.023, "dur": 1.380, + "args": { + "External id": 468255,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338707, "tid": 2379456, + "ts": 6267454035379.335, "dur": 140.635, + "args": { + "External id": 468256,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 6431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2379456, + "ts": 6267454035385.943, "dur": 6.687, + "args": { + "External id": 468257,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035390.381, "dur": 1.407, + "args": { + "External id": 468258,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454035394.861, "dur": 6.864, + "args": { + "External id": 468259,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 6434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035400.324, "dur": 0.524, + "args": { + "External id": 468260,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 6435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2379456, + "ts": 6267454035403.316, "dur": 2.912, + "args": { + "External id": 468261,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035405.394, "dur": 0.417, + "args": { + "External id": 468262,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454035407.258, "dur": 3.299, + "args": { + "External id": 468263,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035409.329, "dur": 0.620, + "args": { + "External id": 468264,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 6439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454035414.834, "dur": 6.160, + "args": { + "External id": 468265,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 6440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035417.593, "dur": 3.032, + "args": { + "External id": 468266,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 6441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454035422.388, "dur": 7.090, + "args": { + "External id": 468267,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 6442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454035426.966, "dur": 2.322, + "args": { + "External id": 468268,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 6443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454035430.509, "dur": 3.204, + "args": { + "External id": 468269,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 6444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035432.471, "dur": 0.883, + "args": { + "External id": 468270,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 6445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454035437.017, "dur": 2.865, + "args": { + "External id": 468271,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454035438.437, "dur": 1.351, + "args": { + "External id": 468272,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267454035441.100, "dur": 64.988, + "args": { + "External id": 468273,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 6448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454035508.454, "dur": 1.351, + "args": { + "External id": 468274,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 6449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454035510.977, "dur": 4.059, + "args": { + "External id": 468275,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 6450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035513.516, "dur": 0.785, + "args": { + "External id": 468276,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 6451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454035517.528, "dur": 1.128, + "args": { + "External id": 468277,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 6452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454035572.031, "dur": 11.278, + "args": { + "External id": 468278,"Record function id": 0, "Ev Idx": 6453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454035574.614, "dur": 8.000, + "args": { + "External id": 468279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454035577.317, "dur": 4.247, + "args": { + "External id": 468280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454035578.737, "dur": 2.657, + "args": { + "External id": 468281,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035588.289, "dur": 33.344, + "args": { + "External id": 468282,"Record function id": 0, "Sequence number": 5036750, "Fwd thread id": 1, "Ev Idx": 6457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035589.711, "dur": 28.085, + "args": { + "External id": 468283,"Sequence number": 5036750, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6458 + } + }, + { + "ph": "f", "id": 293, "pid": 2338707, "tid": 2379456, "ts": 6267454035589.711, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454035613.521, "dur": 3.993, + "args": { + "External id": 468284,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454035615.458, "dur": 1.770, + "args": { + "External id": 468285,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035628.251, "dur": 142.674, + "args": { + "External id": 468286,"Record function id": 0, "Sequence number": 5036749, "Fwd thread id": 1, "Ev Idx": 6461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035629.293, "dur": 133.957, + "args": { + "External id": 468287,"Sequence number": 5036749, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6462 + } + }, + { + "ph": "f", "id": 294, "pid": 2338707, "tid": 2379456, "ts": 6267454035629.293, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454035633.439, "dur": 7.647, + "args": { + "External id": 468288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454035637.833, "dur": 2.536, + "args": { + "External id": 468289,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035639.424, "dur": 0.702, + "args": { + "External id": 468290,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454035642.309, "dur": 58.591, + "args": { + "External id": 468291,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454035702.414, "dur": 5.369, + "args": { + "External id": 468292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454035703.371, "dur": 3.650, + "args": { + "External id": 468293,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035705.403, "dur": 1.411, + "args": { + "External id": 468294,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454035712.374, "dur": 6.782, + "args": { + "External id": 468295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454035713.434, "dur": 5.156, + "args": { + "External id": 468296,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035715.400, "dur": 3.090, + "args": { + "External id": 468297,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454035719.689, "dur": 42.676, + "args": { + "External id": 468298,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035777.068, "dur": 8.342, + "args": { + "External id": 468299,"Record function id": 0, "Sequence number": 5036748, "Fwd thread id": 1, "Ev Idx": 6474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035778.351, "dur": 5.054, + "args": { + "External id": 468300,"Sequence number": 5036748, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6475 + } + }, + { + "ph": "f", "id": 295, "pid": 2338707, "tid": 2379456, "ts": 6267454035778.351, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454035780.102, "dur": 3.117, + "args": { + "External id": 468301,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454035781.601, "dur": 1.443, + "args": { + "External id": 468302,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035789.513, "dur": 11.519, + "args": { + "External id": 468303,"Record function id": 0, "Sequence number": 5036747, "Fwd thread id": 1, "Ev Idx": 6478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035793.243, "dur": 5.240, + "args": { + "External id": 468304,"Sequence number": 5036747, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6479 + } + }, + { + "ph": "f", "id": 296, "pid": 2338707, "tid": 2379456, "ts": 6267454035793.243, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454035794.448, "dur": 3.786, + "args": { + "External id": 468305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454035795.226, "dur": 2.404, + "args": { + "External id": 468306,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035796.719, "dur": 0.809, + "args": { + "External id": 468307,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454035806.162, "dur": 6.975, + "args": { + "External id": 468308,"Record function id": 0, "Ev Idx": 6483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454035807.900, "dur": 4.558, + "args": { + "External id": 468309,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454035809.524, "dur": 2.602, + "args": { + "External id": 468310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454035810.391, "dur": 1.581, + "args": { + "External id": 468311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035817.388, "dur": 9.042, + "args": { + "External id": 468312,"Record function id": 0, "Sequence number": 5036746, "Fwd thread id": 1, "Ev Idx": 6487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035818.850, "dur": 5.401, + "args": { + "External id": 468313,"Sequence number": 5036746, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6488 + } + }, + { + "ph": "f", "id": 297, "pid": 2338707, "tid": 2379456, "ts": 6267454035818.850, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454035819.696, "dur": 4.378, + "args": { + "External id": 468314,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454035823.081, "dur": 0.875, + "args": { + "External id": 468315,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035830.424, "dur": 113.897, + "args": { + "External id": 468316,"Record function id": 0, "Sequence number": 5036745, "Fwd thread id": 1, "Ev Idx": 6491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035831.357, "dur": 104.429, + "args": { + "External id": 468317,"Sequence number": 5036745, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6492 + } + }, + { + "ph": "f", "id": 298, "pid": 2338707, "tid": 2379456, "ts": 6267454035831.357, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454035833.336, "dur": 3.914, + "args": { + "External id": 468318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454035834.434, "dur": 2.300, + "args": { + "External id": 468319,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035835.783, "dur": 0.780, + "args": { + "External id": 468320,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454035840.545, "dur": 38.977, + "args": { + "External id": 468321,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454035881.030, "dur": 6.944, + "args": { + "External id": 468322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454035882.092, "dur": 5.249, + "args": { + "External id": 468323,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035883.467, "dur": 3.630, + "args": { + "External id": 468324,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454035889.571, "dur": 6.247, + "args": { + "External id": 468325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454035890.697, "dur": 4.423, + "args": { + "External id": 468326,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035894.710, "dur": 0.327, + "args": { + "External id": 468327,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454035896.697, "dur": 38.223, + "args": { + "External id": 468328,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035949.938, "dur": 36.741, + "args": { + "External id": 468329,"Record function id": 0, "Sequence number": 5036744, "Fwd thread id": 1, "Ev Idx": 6504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035950.941, "dur": 4.998, + "args": { + "External id": 468330,"Sequence number": 5036744, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6505 + } + }, + { + "ph": "f", "id": 299, "pid": 2338707, "tid": 2379456, "ts": 6267454035950.941, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454035952.856, "dur": 2.891, + "args": { + "External id": 468331,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454035954.475, "dur": 1.103, + "args": { + "External id": 468332,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267454035958.777, "dur": 24.563, + "args": { + "External id": 468333,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035991.327, "dur": 10.988, + "args": { + "External id": 468334,"Record function id": 0, "Sequence number": 5036743, "Fwd thread id": 1, "Ev Idx": 6509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454035992.442, "dur": 7.835, + "args": { + "External id": 468335,"Sequence number": 5036743, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6510 + } + }, + { + "ph": "f", "id": 300, "pid": 2338707, "tid": 2379456, "ts": 6267454035992.442, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454035993.623, "dur": 6.419, + "args": { + "External id": 468336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454035994.555, "dur": 4.838, + "args": { + "External id": 468337,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454035998.696, "dur": 0.537, + "args": { + "External id": 468338,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454036006.904, "dur": 5.918, + "args": { + "External id": 468339,"Record function id": 0, "Ev Idx": 6514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454036008.553, "dur": 3.644, + "args": { + "External id": 468340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454036009.937, "dur": 1.852, + "args": { + "External id": 468341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454036010.574, "dur": 1.130, + "args": { + "External id": 468342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454036018.171, "dur": 546.828, + "args": { + "External id": 468343,"Record function id": 0, "Sequence number": 5036742, "Fwd thread id": 1, "Ev Idx": 6518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454036019.852, "dur": 502.932, + "args": { + "External id": 468344,"Sequence number": 5036742, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 6519 + } + }, + { + "ph": "f", "id": 301, "pid": 2338707, "tid": 2379456, "ts": 6267454036019.852, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2379456, + "ts": 6267454036047.090, "dur": 87.625, + "args": { + "External id": 468345,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267454036048.758, "dur": 85.619, + "args": { + "External id": 468346,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454036052.476, "dur": 49.577, + "args": { + "External id": 468347,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454036095.554, "dur": 5.383, + "args": { + "External id": 468348,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454036104.131, "dur": 29.400, + "args": { + "External id": 468349,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454036150.018, "dur": 8.842, + "args": { + "External id": 468350,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454036153.823, "dur": 4.891, + "args": { + "External id": 468351,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454036163.670, "dur": 2.584, + "args": { + "External id": 468352,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454036165.084, "dur": 1.073, + "args": { + "External id": 468353,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454036182.371, "dur": 2.887, + "args": { + "External id": 468354,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454036200.893, "dur": 2.849, + "args": { + "External id": 468355,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454036395.753, "dur": 3.883, + "args": { + "External id": 468356,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454036404.909, "dur": 38.735, + "args": { + "External id": 468357,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454036416.037, "dur": 1.146, + "args": { + "External id": 468358,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454036449.979, "dur": 32.776, + "args": { + "External id": 468359,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454036451.848, "dur": 30.684, + "args": { + "External id": 468360,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454036456.502, "dur": 5.043, + "args": { + "External id": 468361,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454036462.948, "dur": 19.015, + "args": { + "External id": 468362,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267454036490.321, "dur": 2.964, + "args": { + "External id": 468363,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454036492.024, "dur": 1.113, + "args": { + "External id": 468364,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454036501.514, "dur": 5.818, + "args": { + "External id": 468365,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454036502.892, "dur": 4.294, + "args": { + "External id": 468366,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454036510.180, "dur": 2.001, + "args": { + "External id": 468367,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454036511.158, "dur": 0.928, + "args": { + "External id": 468368,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454036542.404, "dur": 20.468, + "args": { + "External id": 468369,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454036580.923, "dur": 30.855, + "args": { + "External id": 468370,"Record function id": 0, "Ev Idx": 6545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454036583.484, "dur": 7.845, + "args": { + "External id": 468371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454036586.232, "dur": 4.062, + "args": { + "External id": 468372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454036587.484, "dur": 2.711, + "args": { + "External id": 468373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454036619.110, "dur": 11.914, + "args": { + "External id": 468374,"Record function id": 0, "Sequence number": 5036741, "Fwd thread id": 1, "Ev Idx": 6549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454036621.254, "dur": 6.456, + "args": { + "External id": 468375,"Sequence number": 5036741, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6550 + } + }, + { + "ph": "f", "id": 302, "pid": 2338707, "tid": 2379456, "ts": 6267454036621.254, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454036623.977, "dur": 3.439, + "args": { + "External id": 468376,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454036625.196, "dur": 2.053, + "args": { + "External id": 468377,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454036638.001, "dur": 171.421, + "args": { + "External id": 468378,"Record function id": 0, "Sequence number": 5036740, "Fwd thread id": 1, "Ev Idx": 6553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454036639.277, "dur": 161.784, + "args": { + "External id": 468379,"Sequence number": 5036740, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6554 + } + }, + { + "ph": "f", "id": 303, "pid": 2338707, "tid": 2379456, "ts": 6267454036639.277, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454036643.326, "dur": 6.348, + "args": { + "External id": 468380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454036645.120, "dur": 3.817, + "args": { + "External id": 468381,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454036647.325, "dur": 1.360, + "args": { + "External id": 468382,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454036651.099, "dur": 87.422, + "args": { + "External id": 468383,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454036740.155, "dur": 8.652, + "args": { + "External id": 468384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454036741.044, "dur": 6.976, + "args": { + "External id": 468385,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454036745.154, "dur": 2.669, + "args": { + "External id": 468386,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454036750.650, "dur": 4.364, + "args": { + "External id": 468387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454036752.001, "dur": 2.412, + "args": { + "External id": 468388,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454036753.719, "dur": 0.601, + "args": { + "External id": 468389,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454036755.822, "dur": 44.219, + "args": { + "External id": 468390,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454036815.861, "dur": 8.250, + "args": { + "External id": 468391,"Record function id": 0, "Sequence number": 5036739, "Fwd thread id": 1, "Ev Idx": 6566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454036817.140, "dur": 4.692, + "args": { + "External id": 468392,"Sequence number": 5036739, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6567 + } + }, + { + "ph": "f", "id": 304, "pid": 2338707, "tid": 2379456, "ts": 6267454036817.140, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454036819.022, "dur": 2.619, + "args": { + "External id": 468393,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454036820.360, "dur": 1.128, + "args": { + "External id": 468394,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454036828.146, "dur": 10.921, + "args": { + "External id": 468395,"Record function id": 0, "Sequence number": 5036738, "Fwd thread id": 1, "Ev Idx": 6570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454036831.443, "dur": 5.886, + "args": { + "External id": 468396,"Sequence number": 5036738, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6571 + } + }, + { + "ph": "f", "id": 305, "pid": 2338707, "tid": 2379456, "ts": 6267454036831.443, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454036832.600, "dur": 4.493, + "args": { + "External id": 468397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454036833.635, "dur": 2.739, + "args": { + "External id": 468398,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454036835.404, "dur": 0.830, + "args": { + "External id": 468399,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454036844.377, "dur": 9.559, + "args": { + "External id": 468400,"Record function id": 0, "Ev Idx": 6575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454036846.032, "dur": 7.286, + "args": { + "External id": 468401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454036847.652, "dur": 5.253, + "args": { + "External id": 468402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454036848.663, "dur": 4.130, + "args": { + "External id": 468403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454036857.964, "dur": 9.788, + "args": { + "External id": 468404,"Record function id": 0, "Sequence number": 5036737, "Fwd thread id": 1, "Ev Idx": 6579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454036858.906, "dur": 6.948, + "args": { + "External id": 468405,"Sequence number": 5036737, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6580 + } + }, + { + "ph": "f", "id": 306, "pid": 2338707, "tid": 2379456, "ts": 6267454036858.906, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454036862.992, "dur": 2.687, + "args": { + "External id": 468406,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454036863.969, "dur": 1.517, + "args": { + "External id": 468407,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454036873.473, "dur": 447.141, + "args": { + "External id": 468408,"Record function id": 0, "Sequence number": 5036736, "Fwd thread id": 1, "Ev Idx": 6583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454036874.744, "dur": 424.151, + "args": { + "External id": 468409,"Sequence number": 5036736, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6584 + } + }, + { + "ph": "f", "id": 307, "pid": 2338707, "tid": 2379456, "ts": 6267454036874.744, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454036892.653, "dur": 9.143, + "args": { + "External id": 468410,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454036896.419, "dur": 4.887, + "args": { + "External id": 468411,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454036904.279, "dur": 3.870, + "args": { + "External id": 468412,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454036906.099, "dur": 1.849, + "args": { + "External id": 468413,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454036910.040, "dur": 4.948, + "args": { + "External id": 468414,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454036912.441, "dur": 2.342, + "args": { + "External id": 468415,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454036944.762, "dur": 323.577, + "args": { + "External id": 468416,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 6591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454037033.790, "dur": 3.829, + "args": { + "External id": 468417,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454037040.039, "dur": 2.817, + "args": { + "External id": 468418,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454037044.232, "dur": 2.621, + "args": { + "External id": 468419,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454037049.993, "dur": 4.904, + "args": { + "External id": 468420,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454037149.714, "dur": 3.860, + "args": { + "External id": 468421,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454037151.056, "dur": 2.114, + "args": { + "External id": 468422,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454037155.549, "dur": 35.176, + "args": { + "External id": 468423,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454037162.113, "dur": 2.709, + "args": { + "External id": 468424,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454037192.121, "dur": 2.248, + "args": { + "External id": 468425,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454037193.494, "dur": 0.775, + "args": { + "External id": 468426,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454037195.220, "dur": 17.987, + "args": { + "External id": 468427,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454037197.755, "dur": 0.452, + "args": { + "External id": 468428,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454037283.320, "dur": 4.966, + "args": { + "External id": 468429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454037291.924, "dur": 0.760, + "args": { + "External id": 468430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454037295.092, "dur": 0.616, + "args": { + "External id": 468431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454037333.295, "dur": 290.619, + "args": { + "External id": 468432,"Record function id": 0, "Sequence number": 5036735, "Fwd thread id": 1, "Ev Idx": 6607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454037335.003, "dur": 255.336, + "args": { + "External id": 468433,"Sequence number": 5036735, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6608 + } + }, + { + "ph": "f", "id": 308, "pid": 2338707, "tid": 2379456, "ts": 6267454037335.003, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267454037359.554, "dur": 51.087, + "args": { + "External id": 468434,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454037363.721, "dur": 4.570, + "args": { + "External id": 468435,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454037369.941, "dur": 39.800, + "args": { + "External id": 468436,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 6611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454037422.800, "dur": 5.460, + "args": { + "External id": 468437,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454037425.213, "dur": 2.670, + "args": { + "External id": 468438,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454037636.686, "dur": 217.555, + "args": { + "External id": 468439,"Record function id": 0, "Sequence number": 5036734, "Fwd thread id": 1, "Ev Idx": 6614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454037639.079, "dur": 207.074, + "args": { + "External id": 468440,"Sequence number": 5036734, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6615 + } + }, + { + "ph": "f", "id": 309, "pid": 2338707, "tid": 2379456, "ts": 6267454037639.079, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267454037655.486, "dur": 64.024, + "args": { + "External id": 468441,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454037659.849, "dur": 7.433, + "args": { + "External id": 468442,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454037668.465, "dur": 50.250, + "args": { + "External id": 468443,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 6618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454037728.707, "dur": 6.108, + "args": { + "External id": 468444,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454037731.648, "dur": 2.799, + "args": { + "External id": 468445,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454037861.798, "dur": 15.353, + "args": { + "External id": 468446,"Record function id": 0, "Sequence number": 5036733, "Fwd thread id": 1, "Ev Idx": 6621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454037863.679, "dur": 10.623, + "args": { + "External id": 468447,"Sequence number": 5036733, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6622 + } + }, + { + "ph": "f", "id": 310, "pid": 2338707, "tid": 2379456, "ts": 6267454037863.679, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454037866.667, "dur": 7.131, + "args": { + "External id": 468448,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454037868.311, "dur": 5.267, + "args": { + "External id": 468449,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454037881.218, "dur": 8.018, + "args": { + "External id": 468450,"Record function id": 0, "Sequence number": 5036732, "Fwd thread id": 1, "Ev Idx": 6625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454037882.481, "dur": 4.615, + "args": { + "External id": 468451,"Sequence number": 5036732, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6626 + } + }, + { + "ph": "f", "id": 311, "pid": 2338707, "tid": 2379456, "ts": 6267454037882.481, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454037883.964, "dur": 2.918, + "args": { + "External id": 468452,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454037885.535, "dur": 1.231, + "args": { + "External id": 468453,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454037893.002, "dur": 9.889, + "args": { + "External id": 468454,"Record function id": 0, "Sequence number": 5036731, "Fwd thread id": 1, "Ev Idx": 6629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454037894.094, "dur": 6.520, + "args": { + "External id": 468455,"Sequence number": 5036731, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6630 + } + }, + { + "ph": "f", "id": 312, "pid": 2338707, "tid": 2379456, "ts": 6267454037894.094, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454037898.149, "dur": 2.285, + "args": { + "External id": 468456,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454037899.327, "dur": 0.995, + "args": { + "External id": 468457,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454037907.695, "dur": 9.558, + "args": { + "External id": 468458,"Record function id": 0, "Sequence number": 5036730, "Fwd thread id": 1, "Ev Idx": 6633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454037909.055, "dur": 6.121, + "args": { + "External id": 468459,"Sequence number": 5036730, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6634 + } + }, + { + "ph": "f", "id": 313, "pid": 2338707, "tid": 2379456, "ts": 6267454037909.055, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454037910.128, "dur": 4.863, + "args": { + "External id": 468460,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454037911.122, "dur": 3.770, + "args": { + "External id": 468461,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454037921.277, "dur": 240.473, + "args": { + "External id": 468462,"Record function id": 0, "Sequence number": 5036729, "Fwd thread id": 1, "Ev Idx": 6637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454037922.559, "dur": 229.272, + "args": { + "External id": 468463,"Sequence number": 5036729, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6638 + } + }, + { + "ph": "f", "id": 314, "pid": 2338707, "tid": 2379456, "ts": 6267454037922.559, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454037927.190, "dur": 10.525, + "args": { + "External id": 468464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454037932.176, "dur": 4.699, + "args": { + "External id": 468465,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454037934.733, "dur": 1.829, + "args": { + "External id": 468466,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454037939.620, "dur": 87.120, + "args": { + "External id": 468467,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454038028.650, "dur": 10.618, + "args": { + "External id": 468468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454038029.578, "dur": 8.693, + "args": { + "External id": 468469,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454038035.807, "dur": 2.173, + "args": { + "External id": 468470,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454038043.399, "dur": 3.954, + "args": { + "External id": 468471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454038044.817, "dur": 1.709, + "args": { + "External id": 468472,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454038046.044, "dur": 0.402, + "args": { + "External id": 468473,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454038048.097, "dur": 101.911, + "args": { + "External id": 468474,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038171.989, "dur": 10.401, + "args": { + "External id": 468475,"Record function id": 0, "Sequence number": 5036728, "Fwd thread id": 1, "Ev Idx": 6650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038173.491, "dur": 6.481, + "args": { + "External id": 468476,"Sequence number": 5036728, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6651 + } + }, + { + "ph": "f", "id": 315, "pid": 2338707, "tid": 2379456, "ts": 6267454038173.491, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454038175.937, "dur": 3.838, + "args": { + "External id": 468477,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454038177.394, "dur": 2.228, + "args": { + "External id": 468478,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038189.530, "dur": 66.046, + "args": { + "External id": 468479,"Record function id": 0, "Sequence number": 5036727, "Fwd thread id": 1, "Ev Idx": 6654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038190.518, "dur": 62.225, + "args": { + "External id": 468480,"Sequence number": 5036727, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6655 + } + }, + { + "ph": "f", "id": 316, "pid": 2338707, "tid": 2379456, "ts": 6267454038190.518, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454038243.192, "dur": 9.291, + "args": { + "External id": 468481,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454038244.429, "dur": 7.161, + "args": { + "External id": 468482,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454038250.611, "dur": 0.838, + "args": { + "External id": 468483,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454038263.019, "dur": 16.500, + "args": { + "External id": 468484,"Record function id": 0, "Ev Idx": 6659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454038265.004, "dur": 13.571, + "args": { + "External id": 468485,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454038268.225, "dur": 9.806, + "args": { + "External id": 468486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454038272.724, "dur": 5.151, + "args": { + "External id": 468487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038283.265, "dur": 7.335, + "args": { + "External id": 468488,"Record function id": 0, "Sequence number": 5036726, "Fwd thread id": 1, "Ev Idx": 6663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038284.601, "dur": 3.495, + "args": { + "External id": 468489,"Sequence number": 5036726, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6664 + } + }, + { + "ph": "f", "id": 317, "pid": 2338707, "tid": 2379456, "ts": 6267454038284.601, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454038285.720, "dur": 2.199, + "args": { + "External id": 468490,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454038286.639, "dur": 1.119, + "args": { + "External id": 468491,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038294.345, "dur": 118.725, + "args": { + "External id": 468492,"Record function id": 0, "Sequence number": 5036725, "Fwd thread id": 1, "Ev Idx": 6667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038295.170, "dur": 109.820, + "args": { + "External id": 468493,"Sequence number": 5036725, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6668 + } + }, + { + "ph": "f", "id": 318, "pid": 2338707, "tid": 2379456, "ts": 6267454038295.170, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454038297.665, "dur": 5.394, + "args": { + "External id": 468494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454038298.166, "dur": 4.379, + "args": { + "External id": 468495,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454038301.759, "dur": 0.646, + "args": { + "External id": 468496,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454038304.084, "dur": 43.153, + "args": { + "External id": 468497,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454038348.582, "dur": 4.031, + "args": { + "External id": 468498,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454038349.155, "dur": 2.869, + "args": { + "External id": 468499,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454038351.080, "dur": 0.822, + "args": { + "External id": 468500,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454038353.845, "dur": 5.875, + "args": { + "External id": 468501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454038357.212, "dur": 1.916, + "args": { + "External id": 468502,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454038358.445, "dur": 0.604, + "args": { + "External id": 468503,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454038360.199, "dur": 43.750, + "args": { + "External id": 468504,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038418.614, "dur": 42.619, + "args": { + "External id": 468505,"Record function id": 0, "Sequence number": 5036724, "Fwd thread id": 1, "Ev Idx": 6680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038419.517, "dur": 4.971, + "args": { + "External id": 468506,"Sequence number": 5036724, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6681 + } + }, + { + "ph": "f", "id": 319, "pid": 2338707, "tid": 2379456, "ts": 6267454038419.517, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454038421.440, "dur": 2.877, + "args": { + "External id": 468507,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454038422.866, "dur": 1.298, + "args": { + "External id": 468508,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267454038428.155, "dur": 30.122, + "args": { + "External id": 468509,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038466.215, "dur": 10.599, + "args": { + "External id": 468510,"Record function id": 0, "Sequence number": 5036723, "Fwd thread id": 1, "Ev Idx": 6685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038467.124, "dur": 7.680, + "args": { + "External id": 468511,"Sequence number": 5036723, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6686 + } + }, + { + "ph": "f", "id": 320, "pid": 2338707, "tid": 2379456, "ts": 6267454038467.124, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454038471.090, "dur": 3.481, + "args": { + "External id": 468512,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454038471.944, "dur": 1.967, + "args": { + "External id": 468513,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454038473.380, "dur": 0.404, + "args": { + "External id": 468514,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454038482.024, "dur": 5.649, + "args": { + "External id": 468515,"Record function id": 0, "Ev Idx": 6690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454038483.393, "dur": 3.707, + "args": { + "External id": 468516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454038484.689, "dur": 2.039, + "args": { + "External id": 468517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454038485.224, "dur": 1.369, + "args": { + "External id": 468518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038491.264, "dur": 10.471, + "args": { + "External id": 468519,"Record function id": 0, "Sequence number": 5036722, "Fwd thread id": 1, "Ev Idx": 6694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038492.639, "dur": 6.192, + "args": { + "External id": 468520,"Sequence number": 5036722, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6695 + } + }, + { + "ph": "f", "id": 321, "pid": 2338707, "tid": 2379456, "ts": 6267454038492.639, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454038494.007, "dur": 4.626, + "args": { + "External id": 468521,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454038497.485, "dur": 1.030, + "args": { + "External id": 468522,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038505.360, "dur": 134.755, + "args": { + "External id": 468523,"Record function id": 0, "Sequence number": 5036721, "Fwd thread id": 1, "Ev Idx": 6698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038506.299, "dur": 123.419, + "args": { + "External id": 468524,"Sequence number": 5036721, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6699 + } + }, + { + "ph": "f", "id": 322, "pid": 2338707, "tid": 2379456, "ts": 6267454038506.299, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454038508.276, "dur": 3.119, + "args": { + "External id": 468525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454038508.710, "dur": 2.046, + "args": { + "External id": 468526,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454038510.084, "dur": 0.547, + "args": { + "External id": 468527,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454038512.064, "dur": 41.448, + "args": { + "External id": 468528,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454038557.379, "dur": 3.966, + "args": { + "External id": 468529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454038557.964, "dur": 2.649, + "args": { + "External id": 468530,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454038559.830, "dur": 0.641, + "args": { + "External id": 468531,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454038562.624, "dur": 3.426, + "args": { + "External id": 468532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454038563.802, "dur": 1.570, + "args": { + "External id": 468533,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454038564.944, "dur": 0.344, + "args": { + "External id": 468534,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454038569.201, "dur": 58.870, + "args": { + "External id": 468535,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038648.645, "dur": 31.926, + "args": { + "External id": 468536,"Record function id": 0, "Sequence number": 5036720, "Fwd thread id": 1, "Ev Idx": 6711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038650.046, "dur": 4.860, + "args": { + "External id": 468537,"Sequence number": 5036720, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6712 + } + }, + { + "ph": "f", "id": 323, "pid": 2338707, "tid": 2379456, "ts": 6267454038650.046, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454038651.638, "dur": 3.051, + "args": { + "External id": 468538,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454038652.834, "dur": 1.713, + "args": { + "External id": 468539,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454038657.889, "dur": 20.149, + "args": { + "External id": 468540,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038685.286, "dur": 10.524, + "args": { + "External id": 468541,"Record function id": 0, "Sequence number": 5036719, "Fwd thread id": 1, "Ev Idx": 6716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454038686.229, "dur": 7.529, + "args": { + "External id": 468542,"Sequence number": 5036719, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6717 + } + }, + { + "ph": "f", "id": 324, "pid": 2338707, "tid": 2379456, "ts": 6267454038686.229, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454038687.259, "dur": 6.266, + "args": { + "External id": 468543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454038688.023, "dur": 4.748, + "args": { + "External id": 468544,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454038692.210, "dur": 0.440, + "args": { + "External id": 468545,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454038700.875, "dur": 6.349, + "args": { + "External id": 468546,"Record function id": 0, "Ev Idx": 6721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454038702.298, "dur": 4.332, + "args": { + "External id": 468547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454038703.851, "dur": 2.268, + "args": { + "External id": 468548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454038704.504, "dur": 1.515, + "args": { + "External id": 468549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454038712.197, "dur": 481.711, + "args": { + "External id": 468550,"Record function id": 0, "Sequence number": 5036718, "Fwd thread id": 1, "Ev Idx": 6725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454038713.928, "dur": 443.400, + "args": { + "External id": 468551,"Sequence number": 5036718, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6726 + } + }, + { + "ph": "f", "id": 325, "pid": 2338707, "tid": 2379456, "ts": 6267454038713.928, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454038757.140, "dur": 2.403, + "args": { + "External id": 468552,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454038758.217, "dur": 1.177, + "args": { + "External id": 468553,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454038777.574, "dur": 5.369, + "args": { + "External id": 468554,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454038797.717, "dur": 2.613, + "args": { + "External id": 468555,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454038985.752, "dur": 2.047, + "args": { + "External id": 468556,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454038993.056, "dur": 43.315, + "args": { + "External id": 468557,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454039008.159, "dur": 0.846, + "args": { + "External id": 468558,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454039042.670, "dur": 80.771, + "args": { + "External id": 468559,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454039044.831, "dur": 78.234, + "args": { + "External id": 468560,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454039049.957, "dur": 5.467, + "args": { + "External id": 468561,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454039097.772, "dur": 24.362, + "args": { + "External id": 468562,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267454039133.290, "dur": 3.581, + "args": { + "External id": 468563,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454039134.911, "dur": 1.845, + "args": { + "External id": 468564,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454039146.173, "dur": 2.252, + "args": { + "External id": 468565,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454039147.247, "dur": 1.056, + "args": { + "External id": 468566,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454039169.490, "dur": 19.960, + "args": { + "External id": 468567,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454039208.469, "dur": 12.691, + "args": { + "External id": 468568,"Record function id": 0, "Ev Idx": 6743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454039210.926, "dur": 9.456, + "args": { + "External id": 468569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454039213.230, "dur": 5.963, + "args": { + "External id": 468570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454039216.831, "dur": 2.248, + "args": { + "External id": 468571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454039225.431, "dur": 6.522, + "args": { + "External id": 468572,"Record function id": 0, "Sequence number": 5036717, "Fwd thread id": 1, "Ev Idx": 6747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454039227.140, "dur": 1.567, + "args": { + "External id": 468573,"Sequence number": 5036717, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6748 + } + }, + { + "ph": "f", "id": 326, "pid": 2338707, "tid": 2379456, "ts": 6267454039227.140, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454039236.235, "dur": 507.399, + "args": { + "External id": 468574,"Record function id": 0, "Sequence number": 5036716, "Fwd thread id": 1, "Ev Idx": 6749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454039240.487, "dur": 489.818, + "args": { + "External id": 468575,"Sequence number": 5036716, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6750 + } + }, + { + "ph": "f", "id": 327, "pid": 2338707, "tid": 2379456, "ts": 6267454039240.487, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454039274.104, "dur": 10.154, + "args": { + "External id": 468576,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454039280.151, "dur": 3.788, + "args": { + "External id": 468577,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 6752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454039290.997, "dur": 5.284, + "args": { + "External id": 468578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454039292.625, "dur": 2.980, + "args": { + "External id": 468579,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454039294.629, "dur": 0.782, + "args": { + "External id": 468580,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2379456, + "ts": 6267454039300.167, "dur": 106.721, + "args": { + "External id": 468581,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 6756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454039301.017, "dur": 5.926, + "args": { + "External id": 468582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 6757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454039301.540, "dur": 4.853, + "args": { + "External id": 468583,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454039305.944, "dur": 0.353, + "args": { + "External id": 468584,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2379456, + "ts": 6267454039308.135, "dur": 98.105, + "args": { + "External id": 468585,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454039309.590, "dur": 95.961, + "args": { + "External id": 468586,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267454039411.547, "dur": 3.268, + "args": { + "External id": 468587,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 6762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454039413.087, "dur": 1.570, + "args": { + "External id": 468588,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454039451.195, "dur": 4.972, + "args": { + "External id": 468589,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454039457.740, "dur": 5.987, + "args": { + "External id": 468590,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454039467.456, "dur": 2.900, + "args": { + "External id": 468591,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454039509.160, "dur": 2.609, + "args": { + "External id": 468592,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454039510.313, "dur": 1.155, + "args": { + "External id": 468593,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338707, "tid": 2379456, + "ts": 6267454039537.664, "dur": 169.493, + "args": { + "External id": 468594,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 6769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2379456, + "ts": 6267454039543.578, "dur": 6.198, + "args": { + "External id": 468595,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454039547.836, "dur": 0.846, + "args": { + "External id": 468596,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454039551.367, "dur": 6.114, + "args": { + "External id": 468597,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 6772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454039555.979, "dur": 0.510, + "args": { + "External id": 468598,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 6773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2379456, + "ts": 6267454039558.972, "dur": 3.669, + "args": { + "External id": 468599,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454039561.657, "dur": 0.523, + "args": { + "External id": 468600,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454039566.110, "dur": 3.305, + "args": { + "External id": 468601,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454039568.274, "dur": 0.574, + "args": { + "External id": 468602,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 6777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454039573.544, "dur": 3.529, + "args": { + "External id": 468603,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 6778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454039575.888, "dur": 0.814, + "args": { + "External id": 468604,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 6779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454039578.616, "dur": 8.109, + "args": { + "External id": 468605,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 6780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454039582.330, "dur": 4.189, + "args": { + "External id": 468606,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 6781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454039587.754, "dur": 3.140, + "args": { + "External id": 468607,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 6782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454039590.100, "dur": 0.399, + "args": { + "External id": 468608,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 6783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454039611.811, "dur": 4.032, + "args": { + "External id": 468609,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454039613.459, "dur": 2.093, + "args": { + "External id": 468610,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267454039620.149, "dur": 72.234, + "args": { + "External id": 468611,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 6786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454039694.808, "dur": 1.153, + "args": { + "External id": 468612,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 6787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454039697.426, "dur": 4.438, + "args": { + "External id": 468613,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 6788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454039700.141, "dur": 0.526, + "args": { + "External id": 468614,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 6789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454039704.637, "dur": 1.140, + "args": { + "External id": 468615,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 6790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454039757.326, "dur": 10.929, + "args": { + "External id": 468616,"Record function id": 0, "Ev Idx": 6791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454039759.807, "dur": 7.608, + "args": { + "External id": 468617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454039762.528, "dur": 3.679, + "args": { + "External id": 468618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454039763.706, "dur": 2.318, + "args": { + "External id": 468619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454039773.066, "dur": 10.205, + "args": { + "External id": 468620,"Record function id": 0, "Sequence number": 5036715, "Fwd thread id": 1, "Ev Idx": 6795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454039774.742, "dur": 6.503, + "args": { + "External id": 468621,"Sequence number": 5036715, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6796 + } + }, + { + "ph": "f", "id": 328, "pid": 2338707, "tid": 2379456, "ts": 6267454039774.742, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454039778.965, "dur": 2.015, + "args": { + "External id": 468622,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454039779.853, "dur": 0.982, + "args": { + "External id": 468623,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454039787.564, "dur": 125.196, + "args": { + "External id": 468624,"Record function id": 0, "Sequence number": 5036714, "Fwd thread id": 1, "Ev Idx": 6799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454039788.541, "dur": 115.799, + "args": { + "External id": 468625,"Sequence number": 5036714, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6800 + } + }, + { + "ph": "f", "id": 329, "pid": 2338707, "tid": 2379456, "ts": 6267454039788.541, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454039792.167, "dur": 9.419, + "args": { + "External id": 468626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454039793.719, "dur": 7.273, + "args": { + "External id": 468627,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454039797.495, "dur": 3.328, + "args": { + "External id": 468628,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454039802.904, "dur": 47.074, + "args": { + "External id": 468629,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454039851.334, "dur": 5.006, + "args": { + "External id": 468630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454039851.936, "dur": 3.629, + "args": { + "External id": 468631,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454039853.854, "dur": 1.529, + "args": { + "External id": 468632,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454039858.105, "dur": 6.240, + "args": { + "External id": 468633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454039859.027, "dur": 4.691, + "args": { + "External id": 468634,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454039863.300, "dur": 0.288, + "args": { + "External id": 468635,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454039864.885, "dur": 38.468, + "args": { + "External id": 468636,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454039918.495, "dur": 8.113, + "args": { + "External id": 468637,"Record function id": 0, "Sequence number": 5036713, "Fwd thread id": 1, "Ev Idx": 6812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454039919.735, "dur": 5.312, + "args": { + "External id": 468638,"Sequence number": 5036713, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6813 + } + }, + { + "ph": "f", "id": 330, "pid": 2338707, "tid": 2379456, "ts": 6267454039919.735, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454039921.858, "dur": 2.987, + "args": { + "External id": 468639,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454039923.416, "dur": 1.276, + "args": { + "External id": 468640,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454039930.509, "dur": 10.300, + "args": { + "External id": 468641,"Record function id": 0, "Sequence number": 5036712, "Fwd thread id": 1, "Ev Idx": 6816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454039931.435, "dur": 7.325, + "args": { + "External id": 468642,"Sequence number": 5036712, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6817 + } + }, + { + "ph": "f", "id": 331, "pid": 2338707, "tid": 2379456, "ts": 6267454039931.435, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454039932.344, "dur": 6.167, + "args": { + "External id": 468643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454039933.204, "dur": 4.684, + "args": { + "External id": 468644,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454039937.177, "dur": 0.605, + "args": { + "External id": 468645,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454039945.362, "dur": 8.451, + "args": { + "External id": 468646,"Record function id": 0, "Ev Idx": 6821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454039947.040, "dur": 6.164, + "args": { + "External id": 468647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454039948.248, "dur": 4.676, + "args": { + "External id": 468648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454039948.963, "dur": 3.877, + "args": { + "External id": 468649,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454039957.740, "dur": 6.937, + "args": { + "External id": 468650,"Record function id": 0, "Sequence number": 5036711, "Fwd thread id": 1, "Ev Idx": 6825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454039959.213, "dur": 3.251, + "args": { + "External id": 468651,"Sequence number": 5036711, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6826 + } + }, + { + "ph": "f", "id": 332, "pid": 2338707, "tid": 2379456, "ts": 6267454039959.213, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454039960.440, "dur": 1.839, + "args": { + "External id": 468652,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454039961.284, "dur": 0.862, + "args": { + "External id": 468653,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454039968.663, "dur": 148.406, + "args": { + "External id": 468654,"Record function id": 0, "Sequence number": 5036710, "Fwd thread id": 1, "Ev Idx": 6829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454039971.630, "dur": 133.900, + "args": { + "External id": 468655,"Sequence number": 5036710, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6830 + } + }, + { + "ph": "f", "id": 333, "pid": 2338707, "tid": 2379456, "ts": 6267454039971.630, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454039974.005, "dur": 3.030, + "args": { + "External id": 468656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454039974.589, "dur": 1.939, + "args": { + "External id": 468657,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454039975.830, "dur": 0.583, + "args": { + "External id": 468658,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454039977.792, "dur": 36.129, + "args": { + "External id": 468659,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454040015.343, "dur": 6.515, + "args": { + "External id": 468660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454040015.840, "dur": 5.339, + "args": { + "External id": 468661,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454040019.838, "dur": 1.179, + "args": { + "External id": 468662,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454040022.948, "dur": 3.980, + "args": { + "External id": 468663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454040024.550, "dur": 1.832, + "args": { + "External id": 468664,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454040025.815, "dur": 0.464, + "args": { + "External id": 468665,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454040027.431, "dur": 76.354, + "args": { + "External id": 468666,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454040126.122, "dur": 44.056, + "args": { + "External id": 468667,"Record function id": 0, "Sequence number": 5036709, "Fwd thread id": 1, "Ev Idx": 6842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454040127.681, "dur": 8.780, + "args": { + "External id": 468668,"Sequence number": 5036709, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6843 + } + }, + { + "ph": "f", "id": 334, "pid": 2338707, "tid": 2379456, "ts": 6267454040127.681, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454040132.757, "dur": 3.516, + "args": { + "External id": 468669,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454040133.969, "dur": 2.188, + "args": { + "External id": 468670,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267454040139.903, "dur": 26.869, + "args": { + "External id": 468671,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454040174.850, "dur": 9.619, + "args": { + "External id": 468672,"Record function id": 0, "Sequence number": 5036708, "Fwd thread id": 1, "Ev Idx": 6847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454040176.050, "dur": 6.069, + "args": { + "External id": 468673,"Sequence number": 5036708, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6848 + } + }, + { + "ph": "f", "id": 335, "pid": 2338707, "tid": 2379456, "ts": 6267454040176.050, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454040177.040, "dur": 4.794, + "args": { + "External id": 468674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454040178.252, "dur": 2.878, + "args": { + "External id": 468675,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454040180.356, "dur": 0.622, + "args": { + "External id": 468676,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454040189.696, "dur": 9.774, + "args": { + "External id": 468677,"Record function id": 0, "Ev Idx": 6852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454040191.470, "dur": 7.339, + "args": { + "External id": 468678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454040193.307, "dur": 5.101, + "args": { + "External id": 468679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454040196.609, "dur": 1.681, + "args": { + "External id": 468680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454040204.382, "dur": 509.957, + "args": { + "External id": 468681,"Record function id": 0, "Sequence number": 5036707, "Fwd thread id": 1, "Ev Idx": 6856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454040206.384, "dur": 469.644, + "args": { + "External id": 468682,"Sequence number": 5036707, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 6857 + } + }, + { + "ph": "f", "id": 336, "pid": 2338707, "tid": 2379456, "ts": 6267454040206.384, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2379456, + "ts": 6267454040233.969, "dur": 44.739, + "args": { + "External id": 468683,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267454040235.923, "dur": 42.533, + "args": { + "External id": 468684,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454040239.529, "dur": 12.490, + "args": { + "External id": 468685,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454040247.101, "dur": 4.199, + "args": { + "External id": 468686,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454040253.728, "dur": 24.077, + "args": { + "External id": 468687,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454040292.445, "dur": 3.252, + "args": { + "External id": 468688,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454040293.836, "dur": 1.693, + "args": { + "External id": 468689,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454040302.680, "dur": 2.405, + "args": { + "External id": 468690,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454040304.050, "dur": 0.933, + "args": { + "External id": 468691,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454040319.856, "dur": 2.831, + "args": { + "External id": 468692,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454040335.237, "dur": 2.878, + "args": { + "External id": 468693,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454040524.840, "dur": 4.683, + "args": { + "External id": 468694,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454040535.081, "dur": 36.222, + "args": { + "External id": 468695,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454040546.542, "dur": 1.145, + "args": { + "External id": 468696,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454040577.365, "dur": 56.167, + "args": { + "External id": 468697,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454040579.335, "dur": 53.838, + "args": { + "External id": 468698,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454040584.220, "dur": 5.640, + "args": { + "External id": 468699,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454040610.505, "dur": 21.758, + "args": { + "External id": 468700,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267454040640.509, "dur": 6.476, + "args": { + "External id": 468701,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454040644.761, "dur": 2.061, + "args": { + "External id": 468702,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454040656.006, "dur": 3.861, + "args": { + "External id": 468703,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454040657.859, "dur": 1.841, + "args": { + "External id": 468704,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454040662.896, "dur": 2.497, + "args": { + "External id": 468705,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454040664.228, "dur": 1.052, + "args": { + "External id": 468706,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454040693.155, "dur": 19.118, + "args": { + "External id": 468707,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454040730.031, "dur": 12.902, + "args": { + "External id": 468708,"Record function id": 0, "Ev Idx": 6883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454040732.390, "dur": 9.716, + "args": { + "External id": 468709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454040734.942, "dur": 6.078, + "args": { + "External id": 468710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454040738.676, "dur": 2.216, + "args": { + "External id": 468711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454040747.623, "dur": 8.841, + "args": { + "External id": 468712,"Record function id": 0, "Sequence number": 5036706, "Fwd thread id": 1, "Ev Idx": 6887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454040748.795, "dur": 5.055, + "args": { + "External id": 468713,"Sequence number": 5036706, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6888 + } + }, + { + "ph": "f", "id": 337, "pid": 2338707, "tid": 2379456, "ts": 6267454040748.795, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454040751.610, "dur": 2.010, + "args": { + "External id": 468714,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454040752.370, "dur": 1.055, + "args": { + "External id": 468715,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454040760.922, "dur": 201.223, + "args": { + "External id": 468716,"Record function id": 0, "Sequence number": 5036705, "Fwd thread id": 1, "Ev Idx": 6891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454040762.107, "dur": 192.681, + "args": { + "External id": 468717,"Sequence number": 5036705, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6892 + } + }, + { + "ph": "f", "id": 338, "pid": 2338707, "tid": 2379456, "ts": 6267454040762.107, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454040766.086, "dur": 8.137, + "args": { + "External id": 468718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454040767.488, "dur": 6.044, + "args": { + "External id": 468719,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454040772.131, "dur": 1.200, + "args": { + "External id": 468720,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454040775.675, "dur": 100.268, + "args": { + "External id": 468721,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454040877.565, "dur": 5.975, + "args": { + "External id": 468722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454040878.336, "dur": 4.518, + "args": { + "External id": 468723,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454040880.144, "dur": 2.494, + "args": { + "External id": 468724,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454040885.231, "dur": 8.587, + "args": { + "External id": 468725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454040888.879, "dur": 4.356, + "args": { + "External id": 468726,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454040890.254, "dur": 2.873, + "args": { + "External id": 468727,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454040894.460, "dur": 59.246, + "args": { + "External id": 468728,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454040967.889, "dur": 8.068, + "args": { + "External id": 468729,"Record function id": 0, "Sequence number": 5036704, "Fwd thread id": 1, "Ev Idx": 6904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454040969.116, "dur": 4.711, + "args": { + "External id": 468730,"Sequence number": 5036704, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6905 + } + }, + { + "ph": "f", "id": 339, "pid": 2338707, "tid": 2379456, "ts": 6267454040969.116, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454040970.890, "dur": 2.758, + "args": { + "External id": 468731,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454040972.196, "dur": 1.301, + "args": { + "External id": 468732,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454040980.164, "dur": 9.841, + "args": { + "External id": 468733,"Record function id": 0, "Sequence number": 5036703, "Fwd thread id": 1, "Ev Idx": 6908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454040980.974, "dur": 7.086, + "args": { + "External id": 468734,"Sequence number": 5036703, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6909 + } + }, + { + "ph": "f", "id": 340, "pid": 2338707, "tid": 2379456, "ts": 6267454040980.974, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454040981.682, "dur": 6.109, + "args": { + "External id": 468735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454040984.824, "dur": 2.382, + "args": { + "External id": 468736,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454040986.241, "dur": 0.858, + "args": { + "External id": 468737,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454040994.606, "dur": 5.954, + "args": { + "External id": 468738,"Record function id": 0, "Ev Idx": 6913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454040996.055, "dur": 3.918, + "args": { + "External id": 468739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454040997.480, "dur": 2.194, + "args": { + "External id": 468740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454040998.287, "dur": 1.282, + "args": { + "External id": 468741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454041004.685, "dur": 9.076, + "args": { + "External id": 468742,"Record function id": 0, "Sequence number": 5036702, "Fwd thread id": 1, "Ev Idx": 6917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454041005.791, "dur": 5.422, + "args": { + "External id": 468743,"Sequence number": 5036702, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6918 + } + }, + { + "ph": "f", "id": 341, "pid": 2338707, "tid": 2379456, "ts": 6267454041005.791, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454041007.716, "dur": 3.311, + "args": { + "External id": 468744,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454041009.271, "dur": 1.593, + "args": { + "External id": 468745,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454041018.903, "dur": 488.398, + "args": { + "External id": 468746,"Record function id": 0, "Sequence number": 5036701, "Fwd thread id": 1, "Ev Idx": 6921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454041020.720, "dur": 464.912, + "args": { + "External id": 468747,"Sequence number": 5036701, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6922 + } + }, + { + "ph": "f", "id": 342, "pid": 2338707, "tid": 2379456, "ts": 6267454041020.720, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454041037.516, "dur": 8.143, + "args": { + "External id": 468748,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454041040.983, "dur": 4.208, + "args": { + "External id": 468749,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454041048.206, "dur": 4.592, + "args": { + "External id": 468750,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454041050.465, "dur": 2.102, + "args": { + "External id": 468751,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454041054.818, "dur": 50.062, + "args": { + "External id": 468752,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454041097.760, "dur": 6.641, + "args": { + "External id": 468753,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454041135.047, "dur": 320.884, + "args": { + "External id": 468754,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 6929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454041230.301, "dur": 4.104, + "args": { + "External id": 468755,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454041236.672, "dur": 2.644, + "args": { + "External id": 468756,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454041240.582, "dur": 2.933, + "args": { + "External id": 468757,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454041244.754, "dur": 2.219, + "args": { + "External id": 468758,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454041333.805, "dur": 3.452, + "args": { + "External id": 468759,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454041335.405, "dur": 1.751, + "args": { + "External id": 468760,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454041341.619, "dur": 33.885, + "args": { + "External id": 468761,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454041348.711, "dur": 2.460, + "args": { + "External id": 468762,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454041377.502, "dur": 2.248, + "args": { + "External id": 468763,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454041379.040, "dur": 0.631, + "args": { + "External id": 468764,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454041380.574, "dur": 24.425, + "args": { + "External id": 468765,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454041383.185, "dur": 0.695, + "args": { + "External id": 468766,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454041470.773, "dur": 4.540, + "args": { + "External id": 468767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454041479.000, "dur": 0.619, + "args": { + "External id": 468768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454041481.905, "dur": 0.648, + "args": { + "External id": 468769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454041519.893, "dur": 305.075, + "args": { + "External id": 468770,"Record function id": 0, "Sequence number": 5036700, "Fwd thread id": 1, "Ev Idx": 6945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454041521.790, "dur": 292.884, + "args": { + "External id": 468771,"Sequence number": 5036700, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6946 + } + }, + { + "ph": "f", "id": 343, "pid": 2338707, "tid": 2379456, "ts": 6267454041521.790, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267454041544.993, "dur": 76.440, + "args": { + "External id": 468772,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454041548.953, "dur": 6.140, + "args": { + "External id": 468773,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454041556.840, "dur": 63.441, + "args": { + "External id": 468774,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 6949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454041635.887, "dur": 7.711, + "args": { + "External id": 468775,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454041639.367, "dur": 3.799, + "args": { + "External id": 468776,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454041836.305, "dur": 202.234, + "args": { + "External id": 468777,"Record function id": 0, "Sequence number": 5036699, "Fwd thread id": 1, "Ev Idx": 6952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454041838.398, "dur": 192.278, + "args": { + "External id": 468778,"Sequence number": 5036699, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6953 + } + }, + { + "ph": "f", "id": 344, "pid": 2338707, "tid": 2379456, "ts": 6267454041838.398, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267454041852.850, "dur": 58.456, + "args": { + "External id": 468779,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454041861.291, "dur": 3.752, + "args": { + "External id": 468780,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454041869.334, "dur": 41.194, + "args": { + "External id": 468781,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 6956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454041919.999, "dur": 6.179, + "args": { + "External id": 468782,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454041922.528, "dur": 3.275, + "args": { + "External id": 468783,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042046.789, "dur": 64.369, + "args": { + "External id": 468784,"Record function id": 0, "Sequence number": 5036698, "Fwd thread id": 1, "Ev Idx": 6959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042050.628, "dur": 55.272, + "args": { + "External id": 468785,"Sequence number": 5036698, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6960 + } + }, + { + "ph": "f", "id": 345, "pid": 2338707, "tid": 2379456, "ts": 6267454042050.628, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454042053.626, "dur": 51.914, + "args": { + "External id": 468786,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454042055.186, "dur": 49.536, + "args": { + "External id": 468787,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042118.712, "dur": 8.512, + "args": { + "External id": 468788,"Record function id": 0, "Sequence number": 5036697, "Fwd thread id": 1, "Ev Idx": 6963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042120.066, "dur": 5.145, + "args": { + "External id": 468789,"Sequence number": 5036697, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6964 + } + }, + { + "ph": "f", "id": 346, "pid": 2338707, "tid": 2379456, "ts": 6267454042120.066, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454042122.248, "dur": 2.773, + "args": { + "External id": 468790,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454042123.620, "dur": 1.254, + "args": { + "External id": 468791,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042131.156, "dur": 9.634, + "args": { + "External id": 468792,"Record function id": 0, "Sequence number": 5036696, "Fwd thread id": 1, "Ev Idx": 6967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042132.141, "dur": 6.637, + "args": { + "External id": 468793,"Sequence number": 5036696, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6968 + } + }, + { + "ph": "f", "id": 347, "pid": 2338707, "tid": 2379456, "ts": 6267454042132.141, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454042133.544, "dur": 5.047, + "args": { + "External id": 468794,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454042137.606, "dur": 0.871, + "args": { + "External id": 468795,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042145.374, "dur": 6.919, + "args": { + "External id": 468796,"Record function id": 0, "Sequence number": 5036695, "Fwd thread id": 1, "Ev Idx": 6971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042146.641, "dur": 4.020, + "args": { + "External id": 468797,"Sequence number": 5036695, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6972 + } + }, + { + "ph": "f", "id": 348, "pid": 2338707, "tid": 2379456, "ts": 6267454042146.641, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454042148.453, "dur": 2.030, + "args": { + "External id": 468798,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454042149.302, "dur": 1.031, + "args": { + "External id": 468799,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042156.247, "dur": 200.536, + "args": { + "External id": 468800,"Record function id": 0, "Sequence number": 5036694, "Fwd thread id": 1, "Ev Idx": 6975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042157.096, "dur": 189.879, + "args": { + "External id": 468801,"Sequence number": 5036694, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6976 + } + }, + { + "ph": "f", "id": 349, "pid": 2338707, "tid": 2379456, "ts": 6267454042157.096, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454042161.736, "dur": 12.588, + "args": { + "External id": 468802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454042164.248, "dur": 9.193, + "args": { + "External id": 468803,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454042169.607, "dur": 3.456, + "args": { + "External id": 468804,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454042176.251, "dur": 97.511, + "args": { + "External id": 468805,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454042275.572, "dur": 5.705, + "args": { + "External id": 468806,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454042276.302, "dur": 4.076, + "args": { + "External id": 468807,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454042278.746, "dur": 1.421, + "args": { + "External id": 468808,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454042283.065, "dur": 6.264, + "args": { + "External id": 468809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454042284.493, "dur": 4.238, + "args": { + "External id": 468810,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454042288.113, "dur": 0.484, + "args": { + "External id": 468811,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454042289.992, "dur": 55.718, + "args": { + "External id": 468812,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042363.750, "dur": 8.366, + "args": { + "External id": 468813,"Record function id": 0, "Sequence number": 5036693, "Fwd thread id": 1, "Ev Idx": 6988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042364.863, "dur": 5.154, + "args": { + "External id": 468814,"Sequence number": 5036693, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6989 + } + }, + { + "ph": "f", "id": 350, "pid": 2338707, "tid": 2379456, "ts": 6267454042364.863, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454042366.656, "dur": 3.186, + "args": { + "External id": 468815,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454042368.189, "dur": 1.501, + "args": { + "External id": 468816,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042376.102, "dur": 11.681, + "args": { + "External id": 468817,"Record function id": 0, "Sequence number": 5036692, "Fwd thread id": 1, "Ev Idx": 6992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042377.208, "dur": 8.142, + "args": { + "External id": 468818,"Sequence number": 5036692, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6993 + } + }, + { + "ph": "f", "id": 351, "pid": 2338707, "tid": 2379456, "ts": 6267454042377.208, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454042378.246, "dur": 6.857, + "args": { + "External id": 468819,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454042378.953, "dur": 5.541, + "args": { + "External id": 468820,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454042383.728, "dur": 0.594, + "args": { + "External id": 468821,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454042395.114, "dur": 13.859, + "args": { + "External id": 468822,"Record function id": 0, "Ev Idx": 6997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454042397.126, "dur": 10.818, + "args": { + "External id": 468823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454042400.256, "dur": 7.185, + "args": { + "External id": 468824,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454042401.483, "dur": 5.834, + "args": { + "External id": 468825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042413.106, "dur": 6.917, + "args": { + "External id": 468826,"Record function id": 0, "Sequence number": 5036691, "Fwd thread id": 1, "Ev Idx": 7001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042414.674, "dur": 3.168, + "args": { + "External id": 468827,"Sequence number": 5036691, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 7002 + } + }, + { + "ph": "f", "id": 352, "pid": 2338707, "tid": 2379456, "ts": 6267454042414.674, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454042415.615, "dur": 2.035, + "args": { + "External id": 468828,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454042416.562, "dur": 0.927, + "args": { + "External id": 468829,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042423.844, "dur": 102.536, + "args": { + "External id": 468830,"Record function id": 0, "Sequence number": 5036690, "Fwd thread id": 1, "Ev Idx": 7005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042427.101, "dur": 92.487, + "args": { + "External id": 468831,"Sequence number": 5036690, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7006 + } + }, + { + "ph": "f", "id": 353, "pid": 2338707, "tid": 2379456, "ts": 6267454042427.101, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454042429.232, "dur": 3.079, + "args": { + "External id": 468832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454042429.790, "dur": 1.985, + "args": { + "External id": 468833,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 7008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454042431.056, "dur": 0.599, + "args": { + "External id": 468834,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 7009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454042432.942, "dur": 32.277, + "args": { + "External id": 468835,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 7010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454042466.474, "dur": 6.142, + "args": { + "External id": 468836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454042467.066, "dur": 4.929, + "args": { + "External id": 468837,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 7012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454042471.011, "dur": 0.857, + "args": { + "External id": 468838,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 7013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454042474.002, "dur": 3.506, + "args": { + "External id": 468839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454042475.109, "dur": 1.826, + "args": { + "External id": 468840,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454042476.392, "dur": 0.469, + "args": { + "External id": 468841,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454042478.056, "dur": 40.489, + "args": { + "External id": 468842,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 7017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042532.146, "dur": 42.842, + "args": { + "External id": 468843,"Record function id": 0, "Sequence number": 5036689, "Fwd thread id": 1, "Ev Idx": 7018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042533.389, "dur": 6.827, + "args": { + "External id": 468844,"Sequence number": 5036689, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7019 + } + }, + { + "ph": "f", "id": 354, "pid": 2338707, "tid": 2379456, "ts": 6267454042533.389, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454042537.606, "dur": 2.416, + "args": { + "External id": 468845,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454042538.668, "dur": 1.197, + "args": { + "External id": 468846,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267454042543.753, "dur": 28.546, + "args": { + "External id": 468847,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042579.661, "dur": 8.588, + "args": { + "External id": 468848,"Record function id": 0, "Sequence number": 5036688, "Fwd thread id": 1, "Ev Idx": 7023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042580.721, "dur": 4.917, + "args": { + "External id": 468849,"Sequence number": 5036688, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7024 + } + }, + { + "ph": "f", "id": 355, "pid": 2338707, "tid": 2379456, "ts": 6267454042580.721, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454042581.558, "dur": 3.839, + "args": { + "External id": 468850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454042582.188, "dur": 2.606, + "args": { + "External id": 468851,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454042584.109, "dur": 0.545, + "args": { + "External id": 468852,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454042616.212, "dur": 11.961, + "args": { + "External id": 468853,"Record function id": 0, "Ev Idx": 7028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454042618.922, "dur": 8.348, + "args": { + "External id": 468854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454042621.033, "dur": 5.725, + "args": { + "External id": 468855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454042624.477, "dur": 2.024, + "args": { + "External id": 468856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042633.078, "dur": 8.090, + "args": { + "External id": 468857,"Record function id": 0, "Sequence number": 5036687, "Fwd thread id": 1, "Ev Idx": 7032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042634.341, "dur": 4.404, + "args": { + "External id": 468858,"Sequence number": 5036687, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7033 + } + }, + { + "ph": "f", "id": 356, "pid": 2338707, "tid": 2379456, "ts": 6267454042634.341, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454042635.899, "dur": 2.660, + "args": { + "External id": 468859,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454042636.623, "dur": 1.823, + "args": { + "External id": 468860,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042645.101, "dur": 122.029, + "args": { + "External id": 468861,"Record function id": 0, "Sequence number": 5036686, "Fwd thread id": 1, "Ev Idx": 7036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042645.979, "dur": 111.884, + "args": { + "External id": 468862,"Sequence number": 5036686, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7037 + } + }, + { + "ph": "f", "id": 357, "pid": 2338707, "tid": 2379456, "ts": 6267454042645.979, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454042648.081, "dur": 6.656, + "args": { + "External id": 468863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454042651.439, "dur": 2.781, + "args": { + "External id": 468864,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 7039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454042653.110, "dur": 0.976, + "args": { + "External id": 468865,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454042655.847, "dur": 46.642, + "args": { + "External id": 468866,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 7041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454042703.761, "dur": 4.403, + "args": { + "External id": 468867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454042704.616, "dur": 2.832, + "args": { + "External id": 468868,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454042706.221, "dur": 1.082, + "args": { + "External id": 468869,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454042711.704, "dur": 3.819, + "args": { + "External id": 468870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454042713.012, "dur": 1.947, + "args": { + "External id": 468871,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454042714.489, "dur": 0.334, + "args": { + "External id": 468872,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454042716.183, "dur": 40.698, + "args": { + "External id": 468873,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 7048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042772.963, "dur": 31.035, + "args": { + "External id": 468874,"Record function id": 0, "Sequence number": 5036685, "Fwd thread id": 1, "Ev Idx": 7049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042774.152, "dur": 5.202, + "args": { + "External id": 468875,"Sequence number": 5036685, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7050 + } + }, + { + "ph": "f", "id": 358, "pid": 2338707, "tid": 2379456, "ts": 6267454042774.152, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454042776.143, "dur": 3.027, + "args": { + "External id": 468876,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454042777.757, "dur": 1.299, + "args": { + "External id": 468877,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454042782.559, "dur": 18.687, + "args": { + "External id": 468878,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042808.339, "dur": 11.755, + "args": { + "External id": 468879,"Record function id": 0, "Sequence number": 5036684, "Fwd thread id": 1, "Ev Idx": 7054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454042812.037, "dur": 5.766, + "args": { + "External id": 468880,"Sequence number": 5036684, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7055 + } + }, + { + "ph": "f", "id": 359, "pid": 2338707, "tid": 2379456, "ts": 6267454042812.037, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454042813.190, "dur": 4.380, + "args": { + "External id": 468881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454042814.441, "dur": 2.502, + "args": { + "External id": 468882,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454042816.200, "dur": 0.576, + "args": { + "External id": 468883,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454042824.690, "dur": 7.006, + "args": { + "External id": 468884,"Record function id": 0, "Ev Idx": 7059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454042826.583, "dur": 4.420, + "args": { + "External id": 468885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454042827.994, "dur": 2.484, + "args": { + "External id": 468886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454042829.149, "dur": 1.211, + "args": { + "External id": 468887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454042836.883, "dur": 469.359, + "args": { + "External id": 468888,"Record function id": 0, "Sequence number": 5036683, "Fwd thread id": 1, "Ev Idx": 7063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454042838.608, "dur": 430.239, + "args": { + "External id": 468889,"Sequence number": 5036683, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7064 + } + }, + { + "ph": "f", "id": 360, "pid": 2338707, "tid": 2379456, "ts": 6267454042838.608, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454042878.231, "dur": 4.692, + "args": { + "External id": 468890,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454042881.715, "dur": 1.047, + "args": { + "External id": 468891,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454042900.846, "dur": 4.823, + "args": { + "External id": 468892,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454042916.766, "dur": 2.167, + "args": { + "External id": 468893,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454043141.263, "dur": 3.449, + "args": { + "External id": 468894,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 7069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454043150.108, "dur": 46.240, + "args": { + "External id": 468895,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 7070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454043167.093, "dur": 1.327, + "args": { + "External id": 468896,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 7071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454043203.362, "dur": 37.570, + "args": { + "External id": 468897,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 7072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454043205.433, "dur": 35.251, + "args": { + "External id": 468898,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 7073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454043213.863, "dur": 5.372, + "args": { + "External id": 468899,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454043221.379, "dur": 18.748, + "args": { + "External id": 468900,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 7075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267454043246.383, "dur": 3.223, + "args": { + "External id": 468901,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 7076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454043248.237, "dur": 1.251, + "args": { + "External id": 468902,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 7077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454043257.919, "dur": 3.083, + "args": { + "External id": 468903,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454043259.612, "dur": 1.197, + "args": { + "External id": 468904,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454043283.257, "dur": 18.100, + "args": { + "External id": 468905,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454043320.703, "dur": 11.537, + "args": { + "External id": 468906,"Record function id": 0, "Ev Idx": 7081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454043323.374, "dur": 8.034, + "args": { + "External id": 468907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454043326.467, "dur": 3.692, + "args": { + "External id": 468908,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454043327.840, "dur": 2.190, + "args": { + "External id": 468909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454043336.743, "dur": 5.993, + "args": { + "External id": 468910,"Record function id": 0, "Sequence number": 5036682, "Fwd thread id": 1, "Ev Idx": 7085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454043337.974, "dur": 1.583, + "args": { + "External id": 468911,"Sequence number": 5036682, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7086 + } + }, + { + "ph": "f", "id": 361, "pid": 2338707, "tid": 2379456, "ts": 6267454043337.974, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454043346.988, "dur": 505.256, + "args": { + "External id": 468912,"Record function id": 0, "Sequence number": 5036681, "Fwd thread id": 1, "Ev Idx": 7087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454043348.233, "dur": 491.849, + "args": { + "External id": 468913,"Sequence number": 5036681, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7088 + } + }, + { + "ph": "f", "id": 362, "pid": 2338707, "tid": 2379456, "ts": 6267454043348.233, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454043385.356, "dur": 10.200, + "args": { + "External id": 468914,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454043391.504, "dur": 3.711, + "args": { + "External id": 468915,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 7090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454043399.237, "dur": 6.207, + "args": { + "External id": 468916,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454043400.969, "dur": 3.779, + "args": { + "External id": 468917,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454043403.655, "dur": 0.866, + "args": { + "External id": 468918,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2379456, + "ts": 6267454043411.933, "dur": 104.193, + "args": { + "External id": 468919,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 7094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454043413.277, "dur": 3.956, + "args": { + "External id": 468920,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 7095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454043414.104, "dur": 2.576, + "args": { + "External id": 468921,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 7096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454043416.045, "dur": 0.539, + "args": { + "External id": 468922,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 7097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2379456, + "ts": 6267454043418.591, "dur": 96.990, + "args": { + "External id": 468923,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 7098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454043420.671, "dur": 93.980, + "args": { + "External id": 468924,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 7099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267454043520.806, "dur": 6.186, + "args": { + "External id": 468925,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 7100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454043524.973, "dur": 1.849, + "args": { + "External id": 468926,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 7101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454043563.376, "dur": 6.905, + "args": { + "External id": 468927,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454043571.880, "dur": 2.675, + "args": { + "External id": 468928,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454043575.669, "dur": 2.699, + "args": { + "External id": 468929,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454043642.995, "dur": 3.427, + "args": { + "External id": 468930,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454043644.102, "dur": 2.054, + "args": { + "External id": 468931,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338707, "tid": 2379456, + "ts": 6267454043674.772, "dur": 142.209, + "args": { + "External id": 468932,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 7107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2379456, + "ts": 6267454043680.583, "dur": 6.645, + "args": { + "External id": 468933,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454043685.324, "dur": 1.083, + "args": { + "External id": 468934,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454043689.057, "dur": 7.864, + "args": { + "External id": 468935,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 7110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454043695.374, "dur": 0.784, + "args": { + "External id": 468936,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 7111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2379456, + "ts": 6267454043698.493, "dur": 2.884, + "args": { + "External id": 468937,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 7112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454043700.418, "dur": 0.511, + "args": { + "External id": 468938,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 7113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454043702.415, "dur": 3.773, + "args": { + "External id": 468939,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 7114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454043704.664, "dur": 0.728, + "args": { + "External id": 468940,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 7115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454043710.973, "dur": 5.599, + "args": { + "External id": 468941,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 7116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454043713.090, "dur": 3.102, + "args": { + "External id": 468942,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 7117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454043718.157, "dur": 6.934, + "args": { + "External id": 468943,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 7118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454043722.727, "dur": 2.144, + "args": { + "External id": 468944,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 7119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454043728.936, "dur": 2.996, + "args": { + "External id": 468945,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 7120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454043731.063, "dur": 0.489, + "args": { + "External id": 468946,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 7121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454043732.856, "dur": 2.657, + "args": { + "External id": 468947,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 7122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454043733.606, "dur": 1.797, + "args": { + "External id": 468948,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 7123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267454043736.932, "dur": 66.681, + "args": { + "External id": 468949,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 7124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454043806.003, "dur": 1.254, + "args": { + "External id": 468950,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 7125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338707, "tid": 2379456, + "ts": 6267454043808.292, "dur": 4.014, + "args": { + "External id": 468951,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 7126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454043810.892, "dur": 0.585, + "args": { + "External id": 468952,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 7127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454043814.663, "dur": 0.965, + "args": { + "External id": 468953,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 7128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454043866.384, "dur": 13.123, + "args": { + "External id": 468954,"Record function id": 0, "Ev Idx": 7129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454043868.638, "dur": 10.020, + "args": { + "External id": 468955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454043871.083, "dur": 6.548, + "args": { + "External id": 468956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454043872.382, "dur": 5.118, + "args": { + "External id": 468957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454043884.166, "dur": 8.432, + "args": { + "External id": 468958,"Record function id": 0, "Sequence number": 5036680, "Fwd thread id": 1, "Ev Idx": 7133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454043885.419, "dur": 4.614, + "args": { + "External id": 468959,"Sequence number": 5036680, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 7134 + } + }, + { + "ph": "f", "id": 363, "pid": 2338707, "tid": 2379456, "ts": 6267454043885.419, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454043888.136, "dur": 1.636, + "args": { + "External id": 468960,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454043888.813, "dur": 0.812, + "args": { + "External id": 468961,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454043897.006, "dur": 124.191, + "args": { + "External id": 468962,"Record function id": 0, "Sequence number": 5036679, "Fwd thread id": 1, "Ev Idx": 7137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454043898.257, "dur": 115.026, + "args": { + "External id": 468963,"Sequence number": 5036679, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7138 + } + }, + { + "ph": "f", "id": 364, "pid": 2338707, "tid": 2379456, "ts": 6267454043898.257, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454043904.728, "dur": 4.971, + "args": { + "External id": 468964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454043906.389, "dur": 2.612, + "args": { + "External id": 468965,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 7140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454043908.165, "dur": 0.676, + "args": { + "External id": 468966,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 7141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454043911.035, "dur": 46.912, + "args": { + "External id": 468967,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 7142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454043959.277, "dur": 7.495, + "args": { + "External id": 468968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454043960.397, "dur": 5.743, + "args": { + "External id": 468969,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 7144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454043964.704, "dur": 1.229, + "args": { + "External id": 468970,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 7145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454043968.268, "dur": 4.332, + "args": { + "External id": 468971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454043969.501, "dur": 2.522, + "args": { + "External id": 468972,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454043971.304, "dur": 0.637, + "args": { + "External id": 468973,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454043973.373, "dur": 38.982, + "args": { + "External id": 468974,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 7149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454044026.780, "dur": 7.693, + "args": { + "External id": 468975,"Record function id": 0, "Sequence number": 5036678, "Fwd thread id": 1, "Ev Idx": 7150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454044028.063, "dur": 4.905, + "args": { + "External id": 468976,"Sequence number": 5036678, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7151 + } + }, + { + "ph": "f", "id": 365, "pid": 2338707, "tid": 2379456, "ts": 6267454044028.063, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454044029.977, "dur": 2.821, + "args": { + "External id": 468977,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454044031.228, "dur": 1.392, + "args": { + "External id": 468978,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454044038.757, "dur": 12.464, + "args": { + "External id": 468979,"Record function id": 0, "Sequence number": 5036677, "Fwd thread id": 1, "Ev Idx": 7154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454044041.913, "dur": 7.419, + "args": { + "External id": 468980,"Sequence number": 5036677, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7155 + } + }, + { + "ph": "f", "id": 366, "pid": 2338707, "tid": 2379456, "ts": 6267454044041.913, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454044042.766, "dur": 6.331, + "args": { + "External id": 468981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454044043.593, "dur": 4.858, + "args": { + "External id": 468982,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454044045.330, "dur": 2.997, + "args": { + "External id": 468983,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454044098.872, "dur": 9.663, + "args": { + "External id": 468984,"Record function id": 0, "Ev Idx": 7159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454044101.907, "dur": 5.646, + "args": { + "External id": 468985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454044103.878, "dur": 3.197, + "args": { + "External id": 468986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454044104.873, "dur": 1.908, + "args": { + "External id": 468987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454044113.880, "dur": 9.916, + "args": { + "External id": 468988,"Record function id": 0, "Sequence number": 5036676, "Fwd thread id": 1, "Ev Idx": 7163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454044114.929, "dur": 6.005, + "args": { + "External id": 468989,"Sequence number": 5036676, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 7164 + } + }, + { + "ph": "f", "id": 367, "pid": 2338707, "tid": 2379456, "ts": 6267454044114.929, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454044116.260, "dur": 4.447, + "args": { + "External id": 468990,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454044119.424, "dur": 1.166, + "args": { + "External id": 468991,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454044127.813, "dur": 174.232, + "args": { + "External id": 468992,"Record function id": 0, "Sequence number": 5036675, "Fwd thread id": 1, "Ev Idx": 7167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454044180.866, "dur": 111.541, + "args": { + "External id": 468993,"Sequence number": 5036675, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7168 + } + }, + { + "ph": "f", "id": 368, "pid": 2338707, "tid": 2379456, "ts": 6267454044180.866, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454044183.777, "dur": 4.094, + "args": { + "External id": 468994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454044184.461, "dur": 2.821, + "args": { + "External id": 468995,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 7170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454044186.456, "dur": 0.700, + "args": { + "External id": 468996,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 7171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454044190.993, "dur": 47.932, + "args": { + "External id": 468997,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 7172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454044240.400, "dur": 5.949, + "args": { + "External id": 468998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454044241.241, "dur": 4.384, + "args": { + "External id": 468999,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 7174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454044243.061, "dur": 2.358, + "args": { + "External id": 469000,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 7175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454044247.746, "dur": 5.878, + "args": { + "External id": 469001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454044248.749, "dur": 4.381, + "args": { + "External id": 469002,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454044252.580, "dur": 0.470, + "args": { + "External id": 469003,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454044254.116, "dur": 37.418, + "args": { + "External id": 469004,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 7179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454044307.895, "dur": 37.947, + "args": { + "External id": 469005,"Record function id": 0, "Sequence number": 5036674, "Fwd thread id": 1, "Ev Idx": 7180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454044309.051, "dur": 6.300, + "args": { + "External id": 469006,"Sequence number": 5036674, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7181 + } + }, + { + "ph": "f", "id": 369, "pid": 2338707, "tid": 2379456, "ts": 6267454044309.051, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454044310.997, "dur": 4.167, + "args": { + "External id": 469007,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454044311.917, "dur": 3.100, + "args": { + "External id": 469008,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267454044318.861, "dur": 23.566, + "args": { + "External id": 469009,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454044350.608, "dur": 11.038, + "args": { + "External id": 469010,"Record function id": 0, "Sequence number": 5036673, "Fwd thread id": 1, "Ev Idx": 7185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454044351.784, "dur": 7.574, + "args": { + "External id": 469011,"Sequence number": 5036673, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7186 + } + }, + { + "ph": "f", "id": 370, "pid": 2338707, "tid": 2379456, "ts": 6267454044351.784, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454044352.761, "dur": 6.364, + "args": { + "External id": 469012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454044355.828, "dur": 2.653, + "args": { + "External id": 469013,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454044357.681, "dur": 0.668, + "args": { + "External id": 469014,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454044366.354, "dur": 5.524, + "args": { + "External id": 469015,"Record function id": 0, "Ev Idx": 7190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454044367.907, "dur": 3.353, + "args": { + "External id": 469016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454044368.907, "dur": 1.818, + "args": { + "External id": 469017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454044369.524, "dur": 1.097, + "args": { + "External id": 469018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454044376.920, "dur": 520.729, + "args": { + "External id": 469019,"Record function id": 0, "Sequence number": 5036672, "Fwd thread id": 1, "Ev Idx": 7194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454044381.314, "dur": 473.996, + "args": { + "External id": 469020,"Sequence number": 5036672, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 7195 + } + }, + { + "ph": "f", "id": 371, "pid": 2338707, "tid": 2379456, "ts": 6267454044381.314, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2379456, + "ts": 6267454044411.972, "dur": 37.488, + "args": { + "External id": 469021,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267454044413.634, "dur": 35.492, + "args": { + "External id": 469022,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454044416.649, "dur": 6.892, + "args": { + "External id": 469023,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 7198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454044419.664, "dur": 3.318, + "args": { + "External id": 469024,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454044425.111, "dur": 23.388, + "args": { + "External id": 469025,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454044463.328, "dur": 4.937, + "args": { + "External id": 469026,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454044466.521, "dur": 1.566, + "args": { + "External id": 469027,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454044472.718, "dur": 1.606, + "args": { + "External id": 469028,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454044473.201, "dur": 1.007, + "args": { + "External id": 469029,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454044490.055, "dur": 2.742, + "args": { + "External id": 469030,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454044508.382, "dur": 4.927, + "args": { + "External id": 469031,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454044726.689, "dur": 4.804, + "args": { + "External id": 469032,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 7207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454044737.319, "dur": 40.366, + "args": { + "External id": 469033,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 7208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454044750.235, "dur": 1.073, + "args": { + "External id": 469034,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 7209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454044784.524, "dur": 32.814, + "args": { + "External id": 469035,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 7210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454044786.508, "dur": 30.542, + "args": { + "External id": 469036,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 7211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454044791.973, "dur": 5.034, + "args": { + "External id": 469037,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454044798.548, "dur": 17.907, + "args": { + "External id": 469038,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 7213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267454044824.865, "dur": 2.804, + "args": { + "External id": 469039,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 7214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454044826.307, "dur": 1.200, + "args": { + "External id": 469040,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 7215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454044836.255, "dur": 3.334, + "args": { + "External id": 469041,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454044837.697, "dur": 1.734, + "args": { + "External id": 469042,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454044842.034, "dur": 2.321, + "args": { + "External id": 469043,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454044842.902, "dur": 1.345, + "args": { + "External id": 469044,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454044876.334, "dur": 19.641, + "args": { + "External id": 469045,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454044915.942, "dur": 11.424, + "args": { + "External id": 469046,"Record function id": 0, "Ev Idx": 7221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454044919.332, "dur": 7.194, + "args": { + "External id": 469047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454044921.723, "dur": 3.674, + "args": { + "External id": 469048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454044922.802, "dur": 2.454, + "args": { + "External id": 469049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454044931.899, "dur": 9.137, + "args": { + "External id": 469050,"Record function id": 0, "Sequence number": 5036671, "Fwd thread id": 1, "Ev Idx": 7225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454044933.377, "dur": 4.896, + "args": { + "External id": 469051,"Sequence number": 5036671, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7226 + } + }, + { + "ph": "f", "id": 372, "pid": 2338707, "tid": 2379456, "ts": 6267454044933.377, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454044935.073, "dur": 2.995, + "args": { + "External id": 469052,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454044936.294, "dur": 1.582, + "args": { + "External id": 469053,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454044944.998, "dur": 226.299, + "args": { + "External id": 469054,"Record function id": 0, "Sequence number": 5036670, "Fwd thread id": 1, "Ev Idx": 7229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454044945.908, "dur": 216.288, + "args": { + "External id": 469055,"Sequence number": 5036670, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7230 + } + }, + { + "ph": "f", "id": 373, "pid": 2338707, "tid": 2379456, "ts": 6267454044945.908, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454044951.531, "dur": 5.631, + "args": { + "External id": 469056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454044953.213, "dur": 3.182, + "args": { + "External id": 469057,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 7232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454044955.313, "dur": 0.876, + "args": { + "External id": 469058,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454044958.574, "dur": 83.409, + "args": { + "External id": 469059,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 7234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454045043.289, "dur": 9.440, + "args": { + "External id": 469060,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454045044.316, "dur": 7.616, + "args": { + "External id": 469061,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454045046.733, "dur": 4.996, + "args": { + "External id": 469062,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454045098.492, "dur": 5.791, + "args": { + "External id": 469063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454045100.790, "dur": 2.466, + "args": { + "External id": 469064,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454045102.256, "dur": 0.656, + "args": { + "External id": 469065,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454045105.038, "dur": 55.720, + "args": { + "External id": 469066,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 7241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454045179.550, "dur": 12.202, + "args": { + "External id": 469067,"Record function id": 0, "Sequence number": 5036669, "Fwd thread id": 1, "Ev Idx": 7242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454045180.727, "dur": 8.741, + "args": { + "External id": 469068,"Sequence number": 5036669, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7243 + } + }, + { + "ph": "f", "id": 374, "pid": 2338707, "tid": 2379456, "ts": 6267454045180.727, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454045183.172, "dur": 6.088, + "args": { + "External id": 469069,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454045187.267, "dur": 1.840, + "args": { + "External id": 469070,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454045195.897, "dur": 8.610, + "args": { + "External id": 469071,"Record function id": 0, "Sequence number": 5036668, "Fwd thread id": 1, "Ev Idx": 7246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454045196.700, "dur": 5.437, + "args": { + "External id": 469072,"Sequence number": 5036668, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7247 + } + }, + { + "ph": "f", "id": 375, "pid": 2338707, "tid": 2379456, "ts": 6267454045196.700, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454045197.667, "dur": 4.161, + "args": { + "External id": 469073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454045198.598, "dur": 2.595, + "args": { + "External id": 469074,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454045200.405, "dur": 0.667, + "args": { + "External id": 469075,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454045209.911, "dur": 8.214, + "args": { + "External id": 469076,"Record function id": 0, "Ev Idx": 7251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454045211.673, "dur": 5.810, + "args": { + "External id": 469077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454045214.002, "dur": 3.065, + "args": { + "External id": 469078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454045215.007, "dur": 1.909, + "args": { + "External id": 469079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454045221.655, "dur": 9.985, + "args": { + "External id": 469080,"Record function id": 0, "Sequence number": 5036667, "Fwd thread id": 1, "Ev Idx": 7255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454045225.145, "dur": 4.017, + "args": { + "External id": 469081,"Sequence number": 5036667, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7256 + } + }, + { + "ph": "f", "id": 376, "pid": 2338707, "tid": 2379456, "ts": 6267454045225.145, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454045226.426, "dur": 2.546, + "args": { + "External id": 469082,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454045227.457, "dur": 1.373, + "args": { + "External id": 469083,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454045236.666, "dur": 439.060, + "args": { + "External id": 469084,"Record function id": 0, "Sequence number": 5036666, "Fwd thread id": 1, "Ev Idx": 7259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454045238.416, "dur": 415.131, + "args": { + "External id": 469085,"Sequence number": 5036666, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7260 + } + }, + { + "ph": "f", "id": 377, "pid": 2338707, "tid": 2379456, "ts": 6267454045238.416, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454045258.106, "dur": 8.416, + "args": { + "External id": 469086,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 7261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454045261.836, "dur": 4.253, + "args": { + "External id": 469087,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454045268.902, "dur": 7.279, + "args": { + "External id": 469088,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 7263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454045270.753, "dur": 5.238, + "args": { + "External id": 469089,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454045277.826, "dur": 4.299, + "args": { + "External id": 469090,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 7265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454045280.007, "dur": 1.861, + "args": { + "External id": 469091,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454045313.193, "dur": 305.461, + "args": { + "External id": 469092,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454045406.113, "dur": 4.538, + "args": { + "External id": 469093,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454045412.813, "dur": 2.975, + "args": { + "External id": 469094,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454045419.665, "dur": 2.034, + "args": { + "External id": 469095,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454045423.057, "dur": 2.634, + "args": { + "External id": 469096,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454045479.979, "dur": 3.257, + "args": { + "External id": 469097,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454045481.537, "dur": 1.561, + "args": { + "External id": 469098,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454045484.981, "dur": 35.336, + "args": { + "External id": 469099,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454045491.517, "dur": 2.507, + "args": { + "External id": 469100,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454045522.113, "dur": 2.083, + "args": { + "External id": 469101,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454045523.402, "dur": 0.691, + "args": { + "External id": 469102,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454045525.125, "dur": 21.490, + "args": { + "External id": 469103,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454045527.704, "dur": 3.260, + "args": { + "External id": 469104,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454045637.437, "dur": 4.806, + "args": { + "External id": 469105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454045646.054, "dur": 1.067, + "args": { + "External id": 469106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2379456, + "ts": 6267454045649.262, "dur": 0.791, + "args": { + "External id": 469107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454045687.891, "dur": 276.079, + "args": { + "External id": 469108,"Record function id": 0, "Sequence number": 5036665, "Fwd thread id": 1, "Ev Idx": 7283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454045689.745, "dur": 265.652, + "args": { + "External id": 469109,"Sequence number": 5036665, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7284 + } + }, + { + "ph": "f", "id": 378, "pid": 2338707, "tid": 2379456, "ts": 6267454045689.745, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267454045714.838, "dur": 53.643, + "args": { + "External id": 469110,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454045719.651, "dur": 5.107, + "args": { + "External id": 469111,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454045726.438, "dur": 41.309, + "args": { + "External id": 469112,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 7287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454045780.542, "dur": 5.798, + "args": { + "External id": 469113,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 7288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454045783.078, "dur": 2.934, + "args": { + "External id": 469114,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454045972.447, "dur": 244.332, + "args": { + "External id": 469115,"Record function id": 0, "Sequence number": 5036664, "Fwd thread id": 1, "Ev Idx": 7290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454045974.246, "dur": 232.207, + "args": { + "External id": 469116,"Sequence number": 5036664, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7291 + } + }, + { + "ph": "f", "id": 379, "pid": 2338707, "tid": 2379456, "ts": 6267454045974.246, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2379456, + "ts": 6267454045986.719, "dur": 45.848, + "args": { + "External id": 469117,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454045989.989, "dur": 3.449, + "args": { + "External id": 469118,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454045994.615, "dur": 37.241, + "args": { + "External id": 469119,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 7294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2379456, + "ts": 6267454046041.326, "dur": 6.206, + "args": { + "External id": 469120,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 7295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454046044.047, "dur": 3.144, + "args": { + "External id": 469121,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046230.122, "dur": 18.950, + "args": { + "External id": 469122,"Record function id": 0, "Sequence number": 5036663, "Fwd thread id": 1, "Ev Idx": 7297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046232.099, "dur": 13.903, + "args": { + "External id": 469123,"Sequence number": 5036663, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7298 + } + }, + { + "ph": "f", "id": 380, "pid": 2338707, "tid": 2379456, "ts": 6267454046232.099, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454046235.874, "dur": 9.749, + "args": { + "External id": 469124,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454046237.361, "dur": 8.013, + "args": { + "External id": 469125,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046253.496, "dur": 6.251, + "args": { + "External id": 469126,"Record function id": 0, "Sequence number": 5036662, "Fwd thread id": 1, "Ev Idx": 7301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046254.613, "dur": 3.360, + "args": { + "External id": 469127,"Sequence number": 5036662, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7302 + } + }, + { + "ph": "f", "id": 381, "pid": 2338707, "tid": 2379456, "ts": 6267454046254.613, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454046255.824, "dur": 1.980, + "args": { + "External id": 469128,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454046256.780, "dur": 0.894, + "args": { + "External id": 469129,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046263.838, "dur": 8.557, + "args": { + "External id": 469130,"Record function id": 0, "Sequence number": 5036661, "Fwd thread id": 1, "Ev Idx": 7305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046264.800, "dur": 6.123, + "args": { + "External id": 469131,"Sequence number": 5036661, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7306 + } + }, + { + "ph": "f", "id": 382, "pid": 2338707, "tid": 2379456, "ts": 6267454046264.800, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454046269.033, "dur": 1.705, + "args": { + "External id": 469132,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454046269.845, "dur": 0.752, + "args": { + "External id": 469133,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046276.436, "dur": 6.317, + "args": { + "External id": 469134,"Record function id": 0, "Sequence number": 5036660, "Fwd thread id": 1, "Ev Idx": 7309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046277.376, "dur": 3.107, + "args": { + "External id": 469135,"Sequence number": 5036660, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 7310 + } + }, + { + "ph": "f", "id": 383, "pid": 2338707, "tid": 2379456, "ts": 6267454046277.376, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454046278.626, "dur": 1.673, + "args": { + "External id": 469136,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454046279.162, "dur": 1.026, + "args": { + "External id": 469137,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046286.928, "dur": 191.824, + "args": { + "External id": 469138,"Record function id": 0, "Sequence number": 5036659, "Fwd thread id": 1, "Ev Idx": 7313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046287.825, "dur": 183.253, + "args": { + "External id": 469139,"Sequence number": 5036659, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7314 + } + }, + { + "ph": "f", "id": 384, "pid": 2338707, "tid": 2379456, "ts": 6267454046287.825, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454046292.044, "dur": 10.568, + "args": { + "External id": 469140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454046296.893, "dur": 4.910, + "args": { + "External id": 469141,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 7316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454046299.766, "dur": 1.740, + "args": { + "External id": 469142,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 7317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454046304.533, "dur": 91.894, + "args": { + "External id": 469143,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 7318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454046398.407, "dur": 5.540, + "args": { + "External id": 469144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454046399.402, "dur": 3.642, + "args": { + "External id": 469145,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 7320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454046401.596, "dur": 1.211, + "args": { + "External id": 469146,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 7321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454046408.087, "dur": 3.950, + "args": { + "External id": 469147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454046409.000, "dur": 2.437, + "args": { + "External id": 469148,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454046410.720, "dur": 0.607, + "args": { + "External id": 469149,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454046413.171, "dur": 56.791, + "args": { + "External id": 469150,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 7325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046485.236, "dur": 7.218, + "args": { + "External id": 469151,"Record function id": 0, "Sequence number": 5036658, "Fwd thread id": 1, "Ev Idx": 7326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046486.415, "dur": 4.309, + "args": { + "External id": 469152,"Sequence number": 5036658, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7327 + } + }, + { + "ph": "f", "id": 385, "pid": 2338707, "tid": 2379456, "ts": 6267454046486.415, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454046488.029, "dur": 2.489, + "args": { + "External id": 469153,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454046489.215, "dur": 1.127, + "args": { + "External id": 469154,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046496.798, "dur": 10.392, + "args": { + "External id": 469155,"Record function id": 0, "Sequence number": 5036657, "Fwd thread id": 1, "Ev Idx": 7330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046499.951, "dur": 4.925, + "args": { + "External id": 469156,"Sequence number": 5036657, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7331 + } + }, + { + "ph": "f", "id": 386, "pid": 2338707, "tid": 2379456, "ts": 6267454046499.951, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454046500.842, "dur": 3.767, + "args": { + "External id": 469157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454046501.617, "dur": 2.391, + "args": { + "External id": 469158,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454046503.252, "dur": 0.634, + "args": { + "External id": 469159,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454046514.295, "dur": 11.203, + "args": { + "External id": 469160,"Record function id": 0, "Ev Idx": 7335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454046516.158, "dur": 8.329, + "args": { + "External id": 469161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454046519.276, "dur": 4.761, + "args": { + "External id": 469162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454046520.811, "dur": 3.055, + "args": { + "External id": 469163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046529.503, "dur": 9.822, + "args": { + "External id": 469164,"Record function id": 0, "Sequence number": 5036656, "Fwd thread id": 1, "Ev Idx": 7339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046530.642, "dur": 5.805, + "args": { + "External id": 469165,"Sequence number": 5036656, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 7340 + } + }, + { + "ph": "f", "id": 387, "pid": 2338707, "tid": 2379456, "ts": 6267454046530.642, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454046531.780, "dur": 4.484, + "args": { + "External id": 469166,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454046535.102, "dur": 1.005, + "args": { + "External id": 469167,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046543.207, "dur": 142.527, + "args": { + "External id": 469168,"Record function id": 0, "Sequence number": 5036655, "Fwd thread id": 1, "Ev Idx": 7343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046544.283, "dur": 131.349, + "args": { + "External id": 469169,"Sequence number": 5036655, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7344 + } + }, + { + "ph": "f", "id": 388, "pid": 2338707, "tid": 2379456, "ts": 6267454046544.283, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454046546.664, "dur": 3.309, + "args": { + "External id": 469170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454046547.373, "dur": 2.064, + "args": { + "External id": 469171,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 7346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454046548.818, "dur": 0.501, + "args": { + "External id": 469172,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 7347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454046553.626, "dur": 32.928, + "args": { + "External id": 469173,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 7348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454046587.719, "dur": 27.119, + "args": { + "External id": 469174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454046588.493, "dur": 2.620, + "args": { + "External id": 469175,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 7350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454046589.748, "dur": 1.234, + "args": { + "External id": 469176,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 7351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454046617.334, "dur": 9.168, + "args": { + "External id": 469177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454046618.644, "dur": 7.310, + "args": { + "External id": 469178,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454046623.274, "dur": 2.565, + "args": { + "External id": 469179,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454046627.281, "dur": 47.446, + "args": { + "External id": 469180,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 7355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046693.205, "dur": 53.007, + "args": { + "External id": 469181,"Record function id": 0, "Sequence number": 5036654, "Fwd thread id": 1, "Ev Idx": 7356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046702.509, "dur": 10.574, + "args": { + "External id": 469182,"Sequence number": 5036654, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7357 + } + }, + { + "ph": "f", "id": 389, "pid": 2338707, "tid": 2379456, "ts": 6267454046702.509, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454046709.896, "dur": 3.029, + "args": { + "External id": 469183,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454046711.260, "dur": 1.552, + "args": { + "External id": 469184,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2379456, + "ts": 6267454046716.725, "dur": 26.336, + "args": { + "External id": 469185,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046751.223, "dur": 11.286, + "args": { + "External id": 469186,"Record function id": 0, "Sequence number": 5036653, "Fwd thread id": 1, "Ev Idx": 7361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046752.274, "dur": 7.844, + "args": { + "External id": 469187,"Sequence number": 5036653, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7362 + } + }, + { + "ph": "f", "id": 390, "pid": 2338707, "tid": 2379456, "ts": 6267454046752.274, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454046755.623, "dur": 4.248, + "args": { + "External id": 469188,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454046756.875, "dur": 2.315, + "args": { + "External id": 469189,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454046758.428, "dur": 0.613, + "args": { + "External id": 469190,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454046767.702, "dur": 7.517, + "args": { + "External id": 469191,"Record function id": 0, "Ev Idx": 7366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454046769.281, "dur": 5.292, + "args": { + "External id": 469192,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454046771.135, "dur": 2.839, + "args": { + "External id": 469193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454046771.881, "dur": 1.940, + "args": { + "External id": 469194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046779.030, "dur": 8.909, + "args": { + "External id": 469195,"Record function id": 0, "Sequence number": 5036652, "Fwd thread id": 1, "Ev Idx": 7370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046780.281, "dur": 5.467, + "args": { + "External id": 469196,"Sequence number": 5036652, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7371 + } + }, + { + "ph": "f", "id": 391, "pid": 2338707, "tid": 2379456, "ts": 6267454046780.281, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454046781.496, "dur": 4.065, + "args": { + "External id": 469197,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454046784.460, "dur": 0.978, + "args": { + "External id": 469198,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046792.057, "dur": 114.318, + "args": { + "External id": 469199,"Record function id": 0, "Sequence number": 5036651, "Fwd thread id": 1, "Ev Idx": 7374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046792.901, "dur": 104.887, + "args": { + "External id": 469200,"Sequence number": 5036651, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7375 + } + }, + { + "ph": "f", "id": 392, "pid": 2338707, "tid": 2379456, "ts": 6267454046792.901, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454046795.214, "dur": 3.401, + "args": { + "External id": 469201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454046796.013, "dur": 2.040, + "args": { + "External id": 469202,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 7377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454046797.381, "dur": 0.555, + "args": { + "External id": 469203,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454046799.489, "dur": 43.938, + "args": { + "External id": 469204,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 7379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454046847.463, "dur": 3.592, + "args": { + "External id": 469205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454046848.088, "dur": 2.212, + "args": { + "External id": 469206,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454046849.571, "dur": 0.589, + "args": { + "External id": 469207,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454046852.862, "dur": 3.867, + "args": { + "External id": 469208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454046853.818, "dur": 2.357, + "args": { + "External id": 469209,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454046855.495, "dur": 0.603, + "args": { + "External id": 469210,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454046860.002, "dur": 36.949, + "args": { + "External id": 469211,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 7386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046911.888, "dur": 28.378, + "args": { + "External id": 469212,"Record function id": 0, "Sequence number": 5036650, "Fwd thread id": 1, "Ev Idx": 7387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046913.101, "dur": 4.314, + "args": { + "External id": 469213,"Sequence number": 5036650, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7388 + } + }, + { + "ph": "f", "id": 393, "pid": 2338707, "tid": 2379456, "ts": 6267454046913.101, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454046914.813, "dur": 2.393, + "args": { + "External id": 469214,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454046915.926, "dur": 1.110, + "args": { + "External id": 469215,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454046920.079, "dur": 16.983, + "args": { + "External id": 469216,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046945.047, "dur": 11.197, + "args": { + "External id": 469217,"Record function id": 0, "Sequence number": 5036649, "Fwd thread id": 1, "Ev Idx": 7392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338707, "tid": 2379456, + "ts": 6267454046946.084, "dur": 8.110, + "args": { + "External id": 469218,"Sequence number": 5036649, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7393 + } + }, + { + "ph": "f", "id": 394, "pid": 2338707, "tid": 2379456, "ts": 6267454046946.084, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2379456, + "ts": 6267454046946.839, "dur": 7.098, + "args": { + "External id": 469219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2379456, + "ts": 6267454046947.882, "dur": 5.291, + "args": { + "External id": 469220,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454046952.525, "dur": 0.464, + "args": { + "External id": 469221,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454046961.027, "dur": 5.996, + "args": { + "External id": 469222,"Record function id": 0, "Ev Idx": 7397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454046962.517, "dur": 3.972, + "args": { + "External id": 469223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454046963.461, "dur": 2.275, + "args": { + "External id": 469224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454046964.205, "dur": 1.436, + "args": { + "External id": 469225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454046972.230, "dur": 485.378, + "args": { + "External id": 469226,"Record function id": 0, "Sequence number": 5036648, "Fwd thread id": 1, "Ev Idx": 7401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454046973.562, "dur": 438.809, + "args": { + "External id": 469227,"Sequence number": 5036648, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7402 + } + }, + { + "ph": "f", "id": 395, "pid": 2338707, "tid": 2379456, "ts": 6267454046973.562, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454047012.684, "dur": 2.190, + "args": { + "External id": 469228,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454047013.450, "dur": 1.243, + "args": { + "External id": 469229,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454047035.687, "dur": 6.580, + "args": { + "External id": 469230,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454047053.588, "dur": 55.274, + "args": { + "External id": 469231,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454047287.898, "dur": 3.574, + "args": { + "External id": 469232,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 7407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454047299.372, "dur": 41.734, + "args": { + "External id": 469233,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 7408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047312.375, "dur": 1.283, + "args": { + "External id": 469234,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 7409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454047347.954, "dur": 36.082, + "args": { + "External id": 469235,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 7410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454047350.085, "dur": 33.734, + "args": { + "External id": 469236,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 7411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047355.362, "dur": 4.994, + "args": { + "External id": 469237,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454047364.523, "dur": 18.542, + "args": { + "External id": 469238,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 7413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2379456, + "ts": 6267454047389.620, "dur": 3.356, + "args": { + "External id": 469239,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 7414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454047391.600, "dur": 1.251, + "args": { + "External id": 469240,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 7415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454047401.496, "dur": 2.815, + "args": { + "External id": 469241,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454047402.669, "dur": 1.547, + "args": { + "External id": 469242,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454047429.954, "dur": 21.816, + "args": { + "External id": 469243,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454047472.501, "dur": 9.962, + "args": { + "External id": 469244,"Record function id": 0, "Ev Idx": 7419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454047475.057, "dur": 6.613, + "args": { + "External id": 469245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454047477.388, "dur": 3.286, + "args": { + "External id": 469246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454047478.373, "dur": 2.147, + "args": { + "External id": 469247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454047487.728, "dur": 3227.789, + "args": { + "External id": 469248,"Record function id": 0, "Ev Idx": 7423 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.25)", "pid": 2338707, "tid": 2379456, + "ts": 6267454047524.555, "dur": 1066.265, + "args": { + "External id": 469249,"Record function id": 0, "Ev Idx": 7424 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.24", "pid": 2338707, "tid": 2379456, + "ts": 6267454047554.771, "dur": 1025.612, + "args": { + "External id": 469250,"Record function id": 0, "Ev Idx": 7425 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338707, "tid": 2379456, + "ts": 6267454047570.324, "dur": 991.602, + "args": { + "External id": 469251,"Record function id": 0, "Ev Idx": 7426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454047682.221, "dur": 8.984, + "args": { + "External id": 469252,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454047709.543, "dur": 33.674, + "args": { + "External id": 469253,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047714.701, "dur": 1.792, + "args": { + "External id": 469254,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047718.314, "dur": 3.523, + "args": { + "External id": 469255,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047725.117, "dur": 0.517, + "args": { + "External id": 469256,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047726.616, "dur": 0.662, + "args": { + "External id": 469257,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047728.526, "dur": 0.413, + "args": { + "External id": 469258,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047732.028, "dur": 0.550, + "args": { + "External id": 469259,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047734.103, "dur": 0.522, + "args": { + "External id": 469260,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047735.883, "dur": 1.020, + "args": { + "External id": 469261,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047738.319, "dur": 0.316, + "args": { + "External id": 469262,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454047756.987, "dur": 50.414, + "args": { + "External id": 469263,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267454047848.504, "dur": 137.227, + "args": { + "External id": 469264,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454047860.939, "dur": 6.566, + "args": { + "External id": 469265,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267454047873.682, "dur": 11.244, + "args": { + "External id": 469266,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454047878.398, "dur": 6.103, + "args": { + "External id": 469267,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047882.138, "dur": 0.699, + "args": { + "External id": 469268,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454047892.816, "dur": 28.490, + "args": { + "External id": 469269,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047894.771, "dur": 0.617, + "args": { + "External id": 469270,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047897.912, "dur": 2.146, + "args": { + "External id": 469271,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047901.291, "dur": 0.425, + "args": { + "External id": 469272,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047903.488, "dur": 0.270, + "args": { + "External id": 469273,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047906.284, "dur": 0.409, + "args": { + "External id": 469274,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047908.345, "dur": 0.323, + "args": { + "External id": 469275,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047909.602, "dur": 2.884, + "args": { + "External id": 469276,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047914.115, "dur": 0.456, + "args": { + "External id": 469277,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454047915.435, "dur": 0.336, + "args": { + "External id": 469278,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454047939.511, "dur": 36.727, + "args": { + "External id": 469279,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454048050.334, "dur": 387.362, + "args": { + "External id": 469280,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454048125.468, "dur": 306.458, + "args": { + "External id": 469281,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7456, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454048138.568, "dur": 287.009, + "args": { + "External id": 469282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454048462.710, "dur": 2.713, + "args": { + "External id": 469283,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7458, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454048623.096, "dur": 2067.562, + "args": { + "External id": 469284,"Sequence number": 5036647, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7459 + } + }, + { + "ph": "f", "id": 396, "pid": 2338707, "tid": 2379456, "ts": 6267454048623.096, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454048764.453, "dur": 133.398, + "args": { + "External id": 469285,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454048950.376, "dur": 45.666, + "args": { + "External id": 469286,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267454049020.365, "dur": 104.989, + "args": { + "External id": 469287,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454049142.374, "dur": 42.705, + "args": { + "External id": 469288,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454049192.294, "dur": 47.280, + "args": { + "External id": 469289,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454049249.863, "dur": 32.356, + "args": { + "External id": 469290,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454049291.104, "dur": 33.510, + "args": { + "External id": 469291,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454049356.335, "dur": 27.793, + "args": { + "External id": 469292,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454049406.358, "dur": 31.611, + "args": { + "External id": 469293,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454049465.809, "dur": 22.309, + "args": { + "External id": 469294,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454049503.127, "dur": 16.142, + "args": { + "External id": 469295,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454049532.409, "dur": 43.147, + "args": { + "External id": 469296,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454049579.391, "dur": 56.914, + "args": { + "External id": 469297,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454049675.309, "dur": 288.221, + "args": { + "External id": 469298,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454049767.815, "dur": 9.217, + "args": { + "External id": 469299,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454049779.648, "dur": 4.850, + "args": { + "External id": 469300,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454049786.123, "dur": 2.240, + "args": { + "External id": 469301,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454049789.405, "dur": 2.630, + "args": { + "External id": 469302,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454049839.878, "dur": 6.387, + "args": { + "External id": 469303,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454049842.495, "dur": 3.520, + "args": { + "External id": 469304,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454049848.617, "dur": 37.622, + "args": { + "External id": 469305,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454049855.159, "dur": 2.000, + "args": { + "External id": 469306,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454049890.443, "dur": 1.926, + "args": { + "External id": 469307,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454049891.530, "dur": 0.733, + "args": { + "External id": 469308,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454049893.390, "dur": 20.065, + "args": { + "External id": 469309,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454049898.305, "dur": 0.424, + "args": { + "External id": 469310,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454050004.077, "dur": 33.939, + "args": { + "External id": 469311,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454050099.750, "dur": 24.953, + "args": { + "External id": 469312,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454050136.542, "dur": 58.965, + "args": { + "External id": 469313,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454050205.854, "dur": 44.968, + "args": { + "External id": 469314,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454050261.772, "dur": 26.247, + "args": { + "External id": 469315,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454050294.538, "dur": 34.780, + "args": { + "External id": 469316,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454050337.867, "dur": 31.911, + "args": { + "External id": 469317,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454050379.399, "dur": 34.025, + "args": { + "External id": 469318,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267454050441.265, "dur": 27.339, + "args": { + "External id": 469319,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454050489.670, "dur": 27.688, + "args": { + "External id": 469320,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454050536.838, "dur": 19.477, + "args": { + "External id": 469321,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454050575.189, "dur": 15.856, + "args": { + "External id": 469322,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267454050631.364, "dur": 21.309, + "args": { + "External id": 469323,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 7498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050741.719, "dur": 19.587, + "args": { + "External id": 469324,"Record function id": 0, "Ev Idx": 7499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050745.566, "dur": 14.500, + "args": { + "External id": 469325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050750.512, "dur": 8.681, + "args": { + "External id": 469326,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050752.254, "dur": 6.845, + "args": { + "External id": 469327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050765.763, "dur": 5.636, + "args": { + "External id": 469328,"Record function id": 0, "Ev Idx": 7503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050767.675, "dur": 3.204, + "args": { + "External id": 469329,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050768.605, "dur": 1.578, + "args": { + "External id": 469330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050769.129, "dur": 0.957, + "args": { + "External id": 469331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050775.255, "dur": 4.532, + "args": { + "External id": 469332,"Record function id": 0, "Ev Idx": 7507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050776.719, "dur": 2.555, + "args": { + "External id": 469333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050777.339, "dur": 1.412, + "args": { + "External id": 469334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050777.897, "dur": 0.776, + "args": { + "External id": 469335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050783.618, "dur": 4.401, + "args": { + "External id": 469336,"Record function id": 0, "Ev Idx": 7511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050785.248, "dur": 2.275, + "args": { + "External id": 469337,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050785.854, "dur": 1.136, + "args": { + "External id": 469338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050786.213, "dur": 0.698, + "args": { + "External id": 469339,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050791.772, "dur": 4.586, + "args": { + "External id": 469340,"Record function id": 0, "Ev Idx": 7515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050793.440, "dur": 2.446, + "args": { + "External id": 469341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050793.985, "dur": 1.076, + "args": { + "External id": 469342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050794.288, "dur": 0.696, + "args": { + "External id": 469343,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050800.030, "dur": 6.503, + "args": { + "External id": 469344,"Record function id": 0, "Ev Idx": 7519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050801.374, "dur": 4.647, + "args": { + "External id": 469345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050801.892, "dur": 3.417, + "args": { + "External id": 469346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050804.563, "dur": 0.667, + "args": { + "External id": 469347,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050810.423, "dur": 4.016, + "args": { + "External id": 469348,"Record function id": 0, "Ev Idx": 7523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050811.795, "dur": 2.164, + "args": { + "External id": 469349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050812.374, "dur": 1.095, + "args": { + "External id": 469350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050812.719, "dur": 0.668, + "args": { + "External id": 469351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050818.152, "dur": 6.969, + "args": { + "External id": 469352,"Record function id": 0, "Ev Idx": 7527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050819.394, "dur": 5.209, + "args": { + "External id": 469353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050819.946, "dur": 4.003, + "args": { + "External id": 469354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050820.424, "dur": 3.445, + "args": { + "External id": 469355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050828.791, "dur": 4.415, + "args": { + "External id": 469356,"Record function id": 0, "Ev Idx": 7531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454050830.033, "dur": 2.726, + "args": { + "External id": 469357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050830.686, "dur": 1.600, + "args": { + "External id": 469358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454050831.289, "dur": 0.920, + "args": { + "External id": 469359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454050837.597, "dur": 278508.628, + "args": { + "External id": 469360,"Record function id": 0, "Sequence number": 5036646, "Fwd thread id": 1, "Ev Idx": 7535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454050838.990, "dur": 278497.131, + "args": { + "External id": 469361,"Sequence number": 5036646, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7536 + } + }, + { + "ph": "f", "id": 397, "pid": 2338707, "tid": 2379456, "ts": 6267454050838.990, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.25)", "pid": 2338707, "tid": 2379456, + "ts": 6267454050877.762, "dur": 48.558, + "args": { + "External id": 469362,"Record function id": 0, "Ev Idx": 7537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.25)", "pid": 2338707, "tid": 2379456, + "ts": 6267454050936.220, "dur": 77.433, + "args": { + "External id": 469363,"Record function id": 0, "Ev Idx": 7538 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.25)", "pid": 2338707, "tid": 2379456, + "ts": 6267454051020.355, "dur": 278306.539, + "args": { + "External id": 469364,"Record function id": 0, "Ev Idx": 7539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454051126.196, "dur": 11.404, + "args": { + "External id": 469365,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454051155.477, "dur": 5.679, + "args": { + "External id": 469366,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454051180.300, "dur": 277032.211, + "args": { + "External id": 469367,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454051197.240, "dur": 276998.135, + "args": { + "External id": 469368,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454051324.327, "dur": 6.290, + "args": { + "External id": 469369,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454051355.187, "dur": 276779.006, + "args": { + "External id": 469370,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 7545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454051358.744, "dur": 276773.958, + "args": { + "External id": 469371,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 7546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454051364.118, "dur": 11.536, + "args": { + "External id": 469372,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454051378.187, "dur": 276746.624, + "args": { + "External id": 469373,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 7548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454328364.655, "dur": 20.069, + "args": { + "External id": 469374,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 7549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454328372.626, "dur": 11.537, + "args": { + "External id": 469375,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454328431.585, "dur": 394.289, + "args": { + "External id": 469376,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 7551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454328476.504, "dur": 342.609, + "args": { + "External id": 469377,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7552, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454328494.652, "dur": 317.653, + "args": { + "External id": 469378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 7553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454328853.240, "dur": 2.380, + "args": { + "External id": 469379,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7554, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454328927.786, "dur": 10.947, + "args": { + "External id": 469380,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454328955.479, "dur": 41.161, + "args": { + "External id": 469381,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454329009.855, "dur": 4.170, + "args": { + "External id": 469382,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454329021.107, "dur": 14.186, + "args": { + "External id": 469383,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454329042.547, "dur": 1.286, + "args": { + "External id": 469384,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454329049.291, "dur": 51.011, + "args": { + "External id": 469385,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454329110.899, "dur": 1.904, + "args": { + "External id": 469386,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454329118.394, "dur": 14.765, + "args": { + "External id": 469387,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454329141.128, "dur": 1.075, + "args": { + "External id": 469388,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454329146.673, "dur": 12.032, + "args": { + "External id": 469389,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454329163.714, "dur": 1.899, + "args": { + "External id": 469390,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454329170.147, "dur": 12.913, + "args": { + "External id": 469391,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454329188.853, "dur": 1.129, + "args": { + "External id": 469392,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454329194.204, "dur": 11.647, + "args": { + "External id": 469393,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454329210.700, "dur": 1.279, + "args": { + "External id": 469394,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454329219.800, "dur": 13.257, + "args": { + "External id": 469395,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454329237.705, "dur": 1.029, + "args": { + "External id": 469396,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454329242.772, "dur": 11.759, + "args": { + "External id": 469397,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 7572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454329364.454, "dur": 3443.670, + "args": { + "External id": 469398,"Record function id": 0, "Ev Idx": 7573 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.24)", "pid": 2338707, "tid": 2379456, + "ts": 6267454329387.701, "dur": 1261.535, + "args": { + "External id": 469399,"Record function id": 0, "Ev Idx": 7574 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338707, "tid": 2379456, + "ts": 6267454329406.808, "dur": 408.510, + "args": { + "External id": 469400,"Record function id": 0, "Ev Idx": 7575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454329497.805, "dur": 7.737, + "args": { + "External id": 469401,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454329509.804, "dur": 0.806, + "args": { + "External id": 469402,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454329512.734, "dur": 1.184, + "args": { + "External id": 469403,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454329516.275, "dur": 1.046, + "args": { + "External id": 469404,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454329518.748, "dur": 1.089, + "args": { + "External id": 469405,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454329524.351, "dur": 0.860, + "args": { + "External id": 469406,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454329526.791, "dur": 0.997, + "args": { + "External id": 469407,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454329536.224, "dur": 1.071, + "args": { + "External id": 469408,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454329538.955, "dur": 3.363, + "args": { + "External id": 469409,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454329546.302, "dur": 0.849, + "args": { + "External id": 469410,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454329566.328, "dur": 207.342, + "args": { + "External id": 469411,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454329584.822, "dur": 182.633, + "args": { + "External id": 469412,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454329636.558, "dur": 16.660, + "args": { + "External id": 469413,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454329657.587, "dur": 77.801, + "args": { + "External id": 469414,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 7589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454329660.587, "dur": 74.406, + "args": { + "External id": 469415,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 7590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454329665.124, "dur": 7.466, + "args": { + "External id": 469416,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454329674.308, "dur": 59.826, + "args": { + "External id": 469417,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 7592 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.23", "pid": 2338707, "tid": 2379456, + "ts": 6267454329910.678, "dur": 729.980, + "args": { + "External id": 469418,"Record function id": 0, "Ev Idx": 7593 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338707, "tid": 2379456, + "ts": 6267454329930.348, "dur": 692.751, + "args": { + "External id": 469419,"Record function id": 0, "Ev Idx": 7594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454329996.067, "dur": 5.837, + "args": { + "External id": 469420,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454330019.423, "dur": 31.459, + "args": { + "External id": 469421,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330025.057, "dur": 1.865, + "args": { + "External id": 469422,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330028.766, "dur": 1.503, + "args": { + "External id": 469423,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330031.712, "dur": 0.271, + "args": { + "External id": 469424,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330033.675, "dur": 3.025, + "args": { + "External id": 469425,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330039.815, "dur": 0.388, + "args": { + "External id": 469426,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330041.248, "dur": 0.275, + "args": { + "External id": 469427,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330043.103, "dur": 0.365, + "args": { + "External id": 469428,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330045.105, "dur": 0.404, + "args": { + "External id": 469429,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330046.740, "dur": 0.351, + "args": { + "External id": 469430,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454330102.070, "dur": 51.176, + "args": { + "External id": 469431,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267454330192.110, "dur": 136.508, + "args": { + "External id": 469432,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454330204.728, "dur": 5.115, + "args": { + "External id": 469433,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267454330216.122, "dur": 14.195, + "args": { + "External id": 469434,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454330223.293, "dur": 6.564, + "args": { + "External id": 469435,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330227.157, "dur": 0.732, + "args": { + "External id": 469436,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454330238.499, "dur": 31.980, + "args": { + "External id": 469437,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330240.656, "dur": 3.134, + "args": { + "External id": 469438,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330247.508, "dur": 0.680, + "args": { + "External id": 469439,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330249.468, "dur": 0.519, + "args": { + "External id": 469440,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330251.583, "dur": 1.487, + "args": { + "External id": 469441,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330254.481, "dur": 0.436, + "args": { + "External id": 469442,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330256.793, "dur": 0.330, + "args": { + "External id": 469443,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330260.250, "dur": 0.413, + "args": { + "External id": 469444,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330262.381, "dur": 0.549, + "args": { + "External id": 469445,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454330264.078, "dur": 2.471, + "args": { + "External id": 469446,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454330283.898, "dur": 35.433, + "args": { + "External id": 469447,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454330383.436, "dur": 136.772, + "args": { + "External id": 469448,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454330417.545, "dur": 98.613, + "args": { + "External id": 469449,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7624, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454330428.561, "dur": 82.575, + "args": { + "External id": 469450,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454330541.147, "dur": 1.966, + "args": { + "External id": 469451,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7626, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454330658.747, "dur": 2124.606, + "args": { + "External id": 469452,"Sequence number": 5036645, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7627 + } + }, + { + "ph": "f", "id": 398, "pid": 2338707, "tid": 2379456, "ts": 6267454330658.747, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454330792.329, "dur": 134.488, + "args": { + "External id": 469453,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454330981.084, "dur": 46.908, + "args": { + "External id": 469454,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267454331049.808, "dur": 111.003, + "args": { + "External id": 469455,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454331179.864, "dur": 37.929, + "args": { + "External id": 469456,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454331225.339, "dur": 36.789, + "args": { + "External id": 469457,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454331269.831, "dur": 33.040, + "args": { + "External id": 469458,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454331314.054, "dur": 33.293, + "args": { + "External id": 469459,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454331379.087, "dur": 29.560, + "args": { + "External id": 469460,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454331432.704, "dur": 32.844, + "args": { + "External id": 469461,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454331489.956, "dur": 23.870, + "args": { + "External id": 469462,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454331530.820, "dur": 19.074, + "args": { + "External id": 469463,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454331560.565, "dur": 61.123, + "args": { + "External id": 469464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454331628.188, "dur": 42.419, + "args": { + "External id": 469465,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454331708.644, "dur": 327.739, + "args": { + "External id": 469466,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454331802.340, "dur": 8.237, + "args": { + "External id": 469467,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454331812.864, "dur": 3.022, + "args": { + "External id": 469468,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454331817.055, "dur": 2.217, + "args": { + "External id": 469469,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454331820.471, "dur": 4.646, + "args": { + "External id": 469470,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454331891.355, "dur": 7.488, + "args": { + "External id": 469471,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454331893.994, "dur": 4.119, + "args": { + "External id": 469472,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454331900.854, "dur": 42.699, + "args": { + "External id": 469473,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454331908.118, "dur": 3.563, + "args": { + "External id": 469474,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454331945.848, "dur": 1.963, + "args": { + "External id": 469475,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454331946.766, "dur": 0.902, + "args": { + "External id": 469476,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454331951.487, "dur": 18.556, + "args": { + "External id": 469477,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454331953.872, "dur": 0.556, + "args": { + "External id": 469478,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454332138.111, "dur": 38.431, + "args": { + "External id": 469479,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454332201.158, "dur": 18.816, + "args": { + "External id": 469480,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454332229.859, "dur": 60.763, + "args": { + "External id": 469481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454332297.924, "dur": 45.178, + "args": { + "External id": 469482,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454332354.829, "dur": 26.200, + "args": { + "External id": 469483,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454332387.354, "dur": 35.718, + "args": { + "External id": 469484,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454332430.952, "dur": 32.704, + "args": { + "External id": 469485,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454332470.964, "dur": 34.008, + "args": { + "External id": 469486,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267454332528.204, "dur": 32.601, + "args": { + "External id": 469487,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454332582.829, "dur": 45.308, + "args": { + "External id": 469488,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454332648.260, "dur": 21.692, + "args": { + "External id": 469489,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454332691.619, "dur": 15.949, + "args": { + "External id": 469490,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267454332730.056, "dur": 17.651, + "args": { + "External id": 469491,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 7666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332834.757, "dur": 17.430, + "args": { + "External id": 469492,"Record function id": 0, "Ev Idx": 7667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332838.657, "dur": 12.421, + "args": { + "External id": 469493,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332843.566, "dur": 6.173, + "args": { + "External id": 469494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332845.182, "dur": 4.387, + "args": { + "External id": 469495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332856.882, "dur": 5.390, + "args": { + "External id": 469496,"Record function id": 0, "Ev Idx": 7671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332858.708, "dur": 3.062, + "args": { + "External id": 469497,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332859.546, "dur": 1.625, + "args": { + "External id": 469498,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332860.006, "dur": 1.027, + "args": { + "External id": 469499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332866.200, "dur": 7.183, + "args": { + "External id": 469500,"Record function id": 0, "Ev Idx": 7675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332867.459, "dur": 5.425, + "args": { + "External id": 469501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332868.076, "dur": 4.325, + "args": { + "External id": 469502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332868.849, "dur": 3.459, + "args": { + "External id": 469503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332877.211, "dur": 48.067, + "args": { + "External id": 469504,"Record function id": 0, "Ev Idx": 7679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332921.855, "dur": 2.885, + "args": { + "External id": 469505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332922.652, "dur": 1.386, + "args": { + "External id": 469506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332923.074, "dur": 0.883, + "args": { + "External id": 469507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332929.074, "dur": 4.732, + "args": { + "External id": 469508,"Record function id": 0, "Ev Idx": 7683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332930.743, "dur": 2.575, + "args": { + "External id": 469509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332931.344, "dur": 1.341, + "args": { + "External id": 469510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332931.729, "dur": 0.877, + "args": { + "External id": 469511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332937.474, "dur": 6.854, + "args": { + "External id": 469512,"Record function id": 0, "Ev Idx": 7687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332938.731, "dur": 5.103, + "args": { + "External id": 469513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332939.289, "dur": 3.941, + "args": { + "External id": 469514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332942.460, "dur": 0.634, + "args": { + "External id": 469515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332948.659, "dur": 4.379, + "args": { + "External id": 469516,"Record function id": 0, "Ev Idx": 7691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332950.241, "dur": 2.315, + "args": { + "External id": 469517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332950.799, "dur": 1.285, + "args": { + "External id": 469518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332951.248, "dur": 0.741, + "args": { + "External id": 469519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332957.121, "dur": 4.168, + "args": { + "External id": 469520,"Record function id": 0, "Ev Idx": 7695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332958.530, "dur": 2.288, + "args": { + "External id": 469521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332959.189, "dur": 1.152, + "args": { + "External id": 469522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332959.589, "dur": 0.676, + "args": { + "External id": 469523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332965.271, "dur": 4.296, + "args": { + "External id": 469524,"Record function id": 0, "Ev Idx": 7699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454332966.500, "dur": 2.562, + "args": { + "External id": 469525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332966.998, "dur": 1.583, + "args": { + "External id": 469526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454332967.846, "dur": 0.659, + "args": { + "External id": 469527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454332974.133, "dur": 87296.884, + "args": { + "External id": 469528,"Record function id": 0, "Sequence number": 5036644, "Fwd thread id": 1, "Ev Idx": 7703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454332975.972, "dur": 87283.840, + "args": { + "External id": 469529,"Sequence number": 5036644, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7704 + } + }, + { + "ph": "f", "id": 399, "pid": 2338707, "tid": 2379456, "ts": 6267454332975.972, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.24)", "pid": 2338707, "tid": 2379456, + "ts": 6267454333012.888, "dur": 85.193, + "args": { + "External id": 469530,"Record function id": 0, "Ev Idx": 7705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.24)", "pid": 2338707, "tid": 2379456, + "ts": 6267454333111.607, "dur": 76.345, + "args": { + "External id": 469531,"Record function id": 0, "Ev Idx": 7706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.24)", "pid": 2338707, "tid": 2379456, + "ts": 6267454333195.222, "dur": 87055.283, + "args": { + "External id": 469532,"Record function id": 0, "Ev Idx": 7707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454333305.811, "dur": 9.368, + "args": { + "External id": 469533,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454333328.166, "dur": 8.219, + "args": { + "External id": 469534,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454333356.389, "dur": 85663.101, + "args": { + "External id": 469535,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454333373.114, "dur": 85629.431, + "args": { + "External id": 469536,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454333499.267, "dur": 20.430, + "args": { + "External id": 469537,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454333544.241, "dur": 85401.817, + "args": { + "External id": 469538,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 7713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454333547.785, "dur": 85396.119, + "args": { + "External id": 469539,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 7714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454333566.784, "dur": 15.551, + "args": { + "External id": 469540,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454333585.373, "dur": 85355.239, + "args": { + "External id": 469541,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 7716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454419219.307, "dur": 16.785, + "args": { + "External id": 469542,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 7717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454419224.752, "dur": 10.516, + "args": { + "External id": 469543,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454419278.397, "dur": 465.600, + "args": { + "External id": 469544,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 7719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454419320.690, "dur": 416.519, + "args": { + "External id": 469545,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7720, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454419341.254, "dur": 387.831, + "args": { + "External id": 469546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 7721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454419774.352, "dur": 2.353, + "args": { + "External id": 469547,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7722, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454419850.913, "dur": 7.860, + "args": { + "External id": 469548,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454419873.061, "dur": 38.082, + "args": { + "External id": 469549,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454419923.057, "dur": 2.128, + "args": { + "External id": 469550,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454419931.646, "dur": 15.040, + "args": { + "External id": 469551,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454419952.991, "dur": 1.255, + "args": { + "External id": 469552,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454419960.931, "dur": 12.277, + "args": { + "External id": 469553,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454419979.052, "dur": 1.138, + "args": { + "External id": 469554,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454419984.786, "dur": 13.223, + "args": { + "External id": 469555,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454420003.220, "dur": 3.878, + "args": { + "External id": 469556,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454420011.844, "dur": 11.429, + "args": { + "External id": 469557,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454420027.973, "dur": 1.851, + "args": { + "External id": 469558,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454420033.647, "dur": 12.618, + "args": { + "External id": 469559,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454420091.680, "dur": 2.001, + "args": { + "External id": 469560,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454420108.174, "dur": 16.845, + "args": { + "External id": 469561,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454420133.931, "dur": 1.034, + "args": { + "External id": 469562,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454420139.488, "dur": 12.649, + "args": { + "External id": 469563,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454420157.171, "dur": 0.954, + "args": { + "External id": 469564,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454420165.026, "dur": 11.418, + "args": { + "External id": 469565,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 7740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454420289.906, "dur": 3443.108, + "args": { + "External id": 469566,"Record function id": 0, "Ev Idx": 7741 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.23)", "pid": 2338707, "tid": 2379456, + "ts": 6267454420314.733, "dur": 1251.208, + "args": { + "External id": 469567,"Record function id": 0, "Ev Idx": 7742 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338707, "tid": 2379456, + "ts": 6267454420333.387, "dur": 405.331, + "args": { + "External id": 469568,"Record function id": 0, "Ev Idx": 7743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454420425.705, "dur": 6.957, + "args": { + "External id": 469569,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454420436.694, "dur": 0.858, + "args": { + "External id": 469570,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454420440.314, "dur": 0.944, + "args": { + "External id": 469571,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454420443.091, "dur": 3.122, + "args": { + "External id": 469572,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454420448.237, "dur": 1.080, + "args": { + "External id": 469573,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454420450.778, "dur": 1.190, + "args": { + "External id": 469574,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454420453.934, "dur": 1.068, + "args": { + "External id": 469575,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454420458.828, "dur": 1.022, + "args": { + "External id": 469576,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454420461.727, "dur": 0.810, + "args": { + "External id": 469577,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454420464.101, "dur": 0.903, + "args": { + "External id": 469578,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454420484.964, "dur": 214.770, + "args": { + "External id": 469579,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454420504.399, "dur": 189.142, + "args": { + "External id": 469580,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454420535.074, "dur": 17.702, + "args": { + "External id": 469581,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454420559.564, "dur": 101.746, + "args": { + "External id": 469582,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 7757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454420562.783, "dur": 98.037, + "args": { + "External id": 469583,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 7758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454420567.194, "dur": 9.901, + "args": { + "External id": 469584,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454420579.067, "dur": 80.310, + "args": { + "External id": 469585,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 7760 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.22", "pid": 2338707, "tid": 2379456, + "ts": 6267454420834.337, "dur": 722.728, + "args": { + "External id": 469586,"Record function id": 0, "Ev Idx": 7761 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338707, "tid": 2379456, + "ts": 6267454420854.717, "dur": 687.176, + "args": { + "External id": 469587,"Record function id": 0, "Ev Idx": 7762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454420924.300, "dur": 7.384, + "args": { + "External id": 469588,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454420949.921, "dur": 39.120, + "args": { + "External id": 469589,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454420955.609, "dur": 6.572, + "args": { + "External id": 469590,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454420964.663, "dur": 0.707, + "args": { + "External id": 469591,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454420966.806, "dur": 0.679, + "args": { + "External id": 469592,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454420971.260, "dur": 0.336, + "args": { + "External id": 469593,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454420972.625, "dur": 0.632, + "args": { + "External id": 469594,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454420974.767, "dur": 0.615, + "args": { + "External id": 469595,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454420977.982, "dur": 2.737, + "args": { + "External id": 469596,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454420982.082, "dur": 0.287, + "args": { + "External id": 469597,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454420983.291, "dur": 1.938, + "args": { + "External id": 469598,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454421000.697, "dur": 47.545, + "args": { + "External id": 469599,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267454421128.161, "dur": 133.856, + "args": { + "External id": 469600,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454421141.556, "dur": 6.421, + "args": { + "External id": 469601,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267454421154.822, "dur": 12.471, + "args": { + "External id": 469602,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454421160.000, "dur": 6.802, + "args": { + "External id": 469603,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454421163.826, "dur": 0.945, + "args": { + "External id": 469604,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454421175.516, "dur": 26.534, + "args": { + "External id": 469605,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454421177.593, "dur": 0.832, + "args": { + "External id": 469606,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454421179.952, "dur": 0.640, + "args": { + "External id": 469607,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454421181.670, "dur": 2.060, + "args": { + "External id": 469608,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454421184.752, "dur": 2.810, + "args": { + "External id": 469609,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454421188.630, "dur": 0.740, + "args": { + "External id": 469610,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454421192.044, "dur": 0.331, + "args": { + "External id": 469611,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454421193.432, "dur": 0.590, + "args": { + "External id": 469612,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454421195.083, "dur": 0.529, + "args": { + "External id": 469613,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454421197.651, "dur": 0.647, + "args": { + "External id": 469614,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454421214.687, "dur": 37.538, + "args": { + "External id": 469615,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454421317.990, "dur": 138.708, + "args": { + "External id": 469616,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454421352.155, "dur": 100.513, + "args": { + "External id": 469617,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7792, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454421366.447, "dur": 81.264, + "args": { + "External id": 469618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454421478.816, "dur": 2.189, + "args": { + "External id": 469619,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7794, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454421574.624, "dur": 2132.004, + "args": { + "External id": 469620,"Sequence number": 5036643, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7795 + } + }, + { + "ph": "f", "id": 400, "pid": 2338707, "tid": 2379456, "ts": 6267454421574.624, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454421725.779, "dur": 131.016, + "args": { + "External id": 469621,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454421910.150, "dur": 44.960, + "args": { + "External id": 469622,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267454421974.903, "dur": 60.219, + "args": { + "External id": 469623,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454422045.834, "dur": 89.017, + "args": { + "External id": 469624,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454422149.493, "dur": 42.120, + "args": { + "External id": 469625,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454422200.048, "dur": 32.451, + "args": { + "External id": 469626,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454422241.229, "dur": 34.275, + "args": { + "External id": 469627,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454422309.096, "dur": 28.202, + "args": { + "External id": 469628,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454422360.778, "dur": 32.708, + "args": { + "External id": 469629,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454422418.730, "dur": 23.179, + "args": { + "External id": 469630,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454422459.853, "dur": 16.704, + "args": { + "External id": 469631,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454422484.960, "dur": 46.807, + "args": { + "External id": 469632,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454422536.282, "dur": 37.428, + "args": { + "External id": 469633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454422631.673, "dur": 322.654, + "args": { + "External id": 469634,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454422748.113, "dur": 9.980, + "args": { + "External id": 469635,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454422761.393, "dur": 3.157, + "args": { + "External id": 469636,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454422765.939, "dur": 2.040, + "args": { + "External id": 469637,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454422769.248, "dur": 4.457, + "args": { + "External id": 469638,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454422832.190, "dur": 6.044, + "args": { + "External id": 469639,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454422834.454, "dur": 3.538, + "args": { + "External id": 469640,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454422842.939, "dur": 39.997, + "args": { + "External id": 469641,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454422849.323, "dur": 1.772, + "args": { + "External id": 469642,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454422884.671, "dur": 1.317, + "args": { + "External id": 469643,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454422885.326, "dur": 0.574, + "args": { + "External id": 469644,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454422887.130, "dur": 19.073, + "args": { + "External id": 469645,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454422889.348, "dur": 3.009, + "args": { + "External id": 469646,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454422998.768, "dur": 33.694, + "args": { + "External id": 469647,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454423095.690, "dur": 25.681, + "args": { + "External id": 469648,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454423134.626, "dur": 64.089, + "args": { + "External id": 469649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454423206.677, "dur": 48.964, + "args": { + "External id": 469650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454423268.458, "dur": 27.188, + "args": { + "External id": 469651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454423302.729, "dur": 37.540, + "args": { + "External id": 469652,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454423348.579, "dur": 32.294, + "args": { + "External id": 469653,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454423388.327, "dur": 34.038, + "args": { + "External id": 469654,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267454423451.409, "dur": 28.732, + "args": { + "External id": 469655,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454423500.382, "dur": 30.418, + "args": { + "External id": 469656,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454423549.103, "dur": 20.703, + "args": { + "External id": 469657,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454423590.742, "dur": 36.977, + "args": { + "External id": 469658,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267454423650.440, "dur": 19.957, + "args": { + "External id": 469659,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 7834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423759.248, "dur": 18.278, + "args": { + "External id": 469660,"Record function id": 0, "Ev Idx": 7835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423763.402, "dur": 12.856, + "args": { + "External id": 469661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423768.582, "dur": 6.596, + "args": { + "External id": 469662,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423770.397, "dur": 4.637, + "args": { + "External id": 469663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423782.008, "dur": 5.663, + "args": { + "External id": 469664,"Record function id": 0, "Ev Idx": 7839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423783.866, "dur": 3.264, + "args": { + "External id": 469665,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423785.022, "dur": 1.548, + "args": { + "External id": 469666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423785.585, "dur": 0.877, + "args": { + "External id": 469667,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423791.797, "dur": 7.593, + "args": { + "External id": 469668,"Record function id": 0, "Ev Idx": 7843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423793.233, "dur": 5.598, + "args": { + "External id": 469669,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423793.818, "dur": 4.380, + "args": { + "External id": 469670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423794.732, "dur": 3.357, + "args": { + "External id": 469671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423803.210, "dur": 4.370, + "args": { + "External id": 469672,"Record function id": 0, "Ev Idx": 7847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423804.514, "dur": 2.580, + "args": { + "External id": 469673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423805.262, "dur": 1.328, + "args": { + "External id": 469674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423805.813, "dur": 0.703, + "args": { + "External id": 469675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423811.319, "dur": 4.407, + "args": { + "External id": 469676,"Record function id": 0, "Ev Idx": 7851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423812.683, "dur": 2.537, + "args": { + "External id": 469677,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423813.408, "dur": 1.015, + "args": { + "External id": 469678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423813.718, "dur": 0.627, + "args": { + "External id": 469679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423819.476, "dur": 5.031, + "args": { + "External id": 469680,"Record function id": 0, "Ev Idx": 7855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423821.087, "dur": 2.957, + "args": { + "External id": 469681,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423821.824, "dur": 1.590, + "args": { + "External id": 469682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423822.534, "dur": 0.803, + "args": { + "External id": 469683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423828.386, "dur": 4.831, + "args": { + "External id": 469684,"Record function id": 0, "Ev Idx": 7859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423829.887, "dur": 2.811, + "args": { + "External id": 469685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423830.655, "dur": 1.546, + "args": { + "External id": 469686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423831.117, "dur": 1.000, + "args": { + "External id": 469687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423836.875, "dur": 7.653, + "args": { + "External id": 469688,"Record function id": 0, "Ev Idx": 7863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423838.465, "dur": 5.599, + "args": { + "External id": 469689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423839.289, "dur": 4.268, + "args": { + "External id": 469690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423842.471, "dur": 0.967, + "args": { + "External id": 469691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423848.167, "dur": 4.145, + "args": { + "External id": 469692,"Record function id": 0, "Ev Idx": 7867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454423849.342, "dur": 2.502, + "args": { + "External id": 469693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423850.040, "dur": 1.337, + "args": { + "External id": 469694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454423850.589, "dur": 0.687, + "args": { + "External id": 469695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454423857.144, "dur": 79496.784, + "args": { + "External id": 469696,"Record function id": 0, "Sequence number": 5036642, "Fwd thread id": 1, "Ev Idx": 7871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454423858.535, "dur": 79484.185, + "args": { + "External id": 469697,"Sequence number": 5036642, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7872 + } + }, + { + "ph": "f", "id": 401, "pid": 2338707, "tid": 2379456, "ts": 6267454423858.535, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.23)", "pid": 2338707, "tid": 2379456, + "ts": 6267454423894.651, "dur": 45.581, + "args": { + "External id": 469698,"Record function id": 0, "Ev Idx": 7873 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.23)", "pid": 2338707, "tid": 2379456, + "ts": 6267454423950.156, "dur": 73.517, + "args": { + "External id": 469699,"Record function id": 0, "Ev Idx": 7874 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.23)", "pid": 2338707, "tid": 2379456, + "ts": 6267454424030.469, "dur": 79303.418, + "args": { + "External id": 469700,"Record function id": 0, "Ev Idx": 7875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454424182.980, "dur": 8.928, + "args": { + "External id": 469701,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454424204.894, "dur": 7.939, + "args": { + "External id": 469702,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454424233.093, "dur": 77910.377, + "args": { + "External id": 469703,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454424249.770, "dur": 77878.057, + "args": { + "External id": 469704,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454424363.199, "dur": 30.143, + "args": { + "External id": 469705,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454424425.056, "dur": 77622.900, + "args": { + "External id": 469706,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 7881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454424428.612, "dur": 77617.374, + "args": { + "External id": 469707,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 7882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454424437.445, "dur": 16.772, + "args": { + "External id": 469708,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454424456.686, "dur": 77585.432, + "args": { + "External id": 469709,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 7884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454502267.441, "dur": 14.330, + "args": { + "External id": 469710,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 7885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454502271.615, "dur": 9.767, + "args": { + "External id": 469711,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454502318.367, "dur": 503.822, + "args": { + "External id": 469712,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 7887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454502356.302, "dur": 458.491, + "args": { + "External id": 469713,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7888, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454502371.656, "dur": 435.361, + "args": { + "External id": 469714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 7889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454502852.631, "dur": 2.984, + "args": { + "External id": 469715,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7890, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454502933.016, "dur": 8.088, + "args": { + "External id": 469716,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454502955.223, "dur": 41.015, + "args": { + "External id": 469717,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454503008.990, "dur": 1.860, + "args": { + "External id": 469718,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454503017.159, "dur": 15.447, + "args": { + "External id": 469719,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454503039.464, "dur": 1.165, + "args": { + "External id": 469720,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454503088.788, "dur": 18.835, + "args": { + "External id": 469721,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454503119.601, "dur": 1.622, + "args": { + "External id": 469722,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454503126.847, "dur": 14.324, + "args": { + "External id": 469723,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454503146.316, "dur": 3.405, + "args": { + "External id": 469724,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454503154.072, "dur": 13.519, + "args": { + "External id": 469725,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454503172.584, "dur": 2.836, + "args": { + "External id": 469726,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454503179.590, "dur": 15.467, + "args": { + "External id": 469727,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454503202.177, "dur": 1.069, + "args": { + "External id": 469728,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454503207.037, "dur": 11.968, + "args": { + "External id": 469729,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454503223.904, "dur": 0.995, + "args": { + "External id": 469730,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454503229.473, "dur": 11.451, + "args": { + "External id": 469731,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454503245.250, "dur": 0.974, + "args": { + "External id": 469732,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454503250.881, "dur": 10.876, + "args": { + "External id": 469733,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 7908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454503372.871, "dur": 3656.150, + "args": { + "External id": 469734,"Record function id": 0, "Ev Idx": 7909 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.22)", "pid": 2338707, "tid": 2379456, + "ts": 6267454503399.746, "dur": 1270.778, + "args": { + "External id": 469735,"Record function id": 0, "Ev Idx": 7910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338707, "tid": 2379456, + "ts": 6267454503417.591, "dur": 395.314, + "args": { + "External id": 469736,"Record function id": 0, "Ev Idx": 7911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454503512.865, "dur": 4.919, + "args": { + "External id": 469737,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454503521.788, "dur": 1.054, + "args": { + "External id": 469738,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454503524.858, "dur": 1.100, + "args": { + "External id": 469739,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454503527.706, "dur": 3.342, + "args": { + "External id": 469740,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454503532.911, "dur": 0.899, + "args": { + "External id": 469741,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454503535.627, "dur": 0.537, + "args": { + "External id": 469742,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454503537.818, "dur": 0.880, + "args": { + "External id": 469743,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454503540.609, "dur": 1.509, + "args": { + "External id": 469744,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454503545.758, "dur": 1.020, + "args": { + "External id": 469745,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454503548.220, "dur": 1.346, + "args": { + "External id": 469746,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454503569.758, "dur": 204.532, + "args": { + "External id": 469747,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454503587.808, "dur": 179.584, + "args": { + "External id": 469748,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454503634.705, "dur": 15.920, + "args": { + "External id": 469749,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454503655.338, "dur": 82.172, + "args": { + "External id": 469750,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 7925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454503661.257, "dur": 75.787, + "args": { + "External id": 469751,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 7926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454503666.120, "dur": 10.070, + "args": { + "External id": 469752,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454503678.141, "dur": 58.057, + "args": { + "External id": 469753,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 7928 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.21", "pid": 2338707, "tid": 2379456, + "ts": 6267454503909.419, "dur": 751.796, + "args": { + "External id": 469754,"Record function id": 0, "Ev Idx": 7929 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338707, "tid": 2379456, + "ts": 6267454503929.849, "dur": 715.095, + "args": { + "External id": 469755,"Record function id": 0, "Ev Idx": 7930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454503996.446, "dur": 6.003, + "args": { + "External id": 469756,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454504020.479, "dur": 79.434, + "args": { + "External id": 469757,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504026.368, "dur": 4.144, + "args": { + "External id": 469758,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504032.799, "dur": 0.748, + "args": { + "External id": 469759,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504034.708, "dur": 0.686, + "args": { + "External id": 469760,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504038.769, "dur": 0.574, + "args": { + "External id": 469761,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504040.398, "dur": 0.655, + "args": { + "External id": 469762,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504042.425, "dur": 0.681, + "args": { + "External id": 469763,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504046.151, "dur": 2.727, + "args": { + "External id": 469764,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504050.250, "dur": 0.350, + "args": { + "External id": 469765,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504091.621, "dur": 3.268, + "args": { + "External id": 469766,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454504117.520, "dur": 55.078, + "args": { + "External id": 469767,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267454504212.081, "dur": 131.423, + "args": { + "External id": 469768,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454504225.607, "dur": 7.101, + "args": { + "External id": 469769,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267454504239.188, "dur": 11.595, + "args": { + "External id": 469770,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454504244.312, "dur": 5.973, + "args": { + "External id": 469771,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504247.998, "dur": 0.764, + "args": { + "External id": 469772,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454504259.423, "dur": 28.045, + "args": { + "External id": 469773,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504261.550, "dur": 0.601, + "args": { + "External id": 469774,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504263.523, "dur": 1.788, + "args": { + "External id": 469775,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504266.893, "dur": 0.886, + "args": { + "External id": 469776,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504268.707, "dur": 3.145, + "args": { + "External id": 469777,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504274.905, "dur": 0.438, + "args": { + "External id": 469778,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504276.449, "dur": 0.318, + "args": { + "External id": 469779,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504277.733, "dur": 0.322, + "args": { + "External id": 469780,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504281.518, "dur": 0.460, + "args": { + "External id": 469781,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454504283.330, "dur": 0.443, + "args": { + "External id": 469782,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454504299.462, "dur": 35.875, + "args": { + "External id": 469783,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454504399.242, "dur": 140.387, + "args": { + "External id": 469784,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454504435.217, "dur": 100.075, + "args": { + "External id": 469785,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7960, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454504446.198, "dur": 83.936, + "args": { + "External id": 469786,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454504560.408, "dur": 2.080, + "args": { + "External id": 469787,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7962, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454504689.121, "dur": 2312.956, + "args": { + "External id": 469788,"Sequence number": 5036641, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7963 + } + }, + { + "ph": "f", "id": 402, "pid": 2338707, "tid": 2379456, "ts": 6267454504689.121, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454504818.367, "dur": 131.591, + "args": { + "External id": 469789,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454505005.123, "dur": 45.462, + "args": { + "External id": 469790,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267454505117.375, "dur": 70.013, + "args": { + "External id": 469791,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454505201.214, "dur": 38.488, + "args": { + "External id": 469792,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454505250.395, "dur": 38.568, + "args": { + "External id": 469793,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454505296.133, "dur": 32.636, + "args": { + "External id": 469794,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454505336.210, "dur": 33.367, + "args": { + "External id": 469795,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454505401.970, "dur": 27.784, + "args": { + "External id": 469796,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454505456.437, "dur": 31.912, + "args": { + "External id": 469797,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454505510.068, "dur": 23.083, + "args": { + "External id": 469798,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454505552.207, "dur": 17.362, + "args": { + "External id": 469799,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454505577.953, "dur": 165.885, + "args": { + "External id": 469800,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454505750.921, "dur": 150.957, + "args": { + "External id": 469801,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454505959.009, "dur": 350.684, + "args": { + "External id": 469802,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454506097.912, "dur": 9.173, + "args": { + "External id": 469803,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454506109.625, "dur": 4.014, + "args": { + "External id": 469804,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454506114.765, "dur": 2.549, + "args": { + "External id": 469805,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454506118.172, "dur": 3.717, + "args": { + "External id": 469806,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454506180.144, "dur": 6.137, + "args": { + "External id": 469807,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454506182.350, "dur": 3.599, + "args": { + "External id": 469808,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454506188.486, "dur": 39.896, + "args": { + "External id": 469809,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454506195.219, "dur": 2.327, + "args": { + "External id": 469810,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454506230.577, "dur": 1.707, + "args": { + "External id": 469811,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454506231.480, "dur": 0.723, + "args": { + "External id": 469812,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454506235.805, "dur": 20.877, + "args": { + "External id": 469813,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454506238.390, "dur": 2.929, + "args": { + "External id": 469814,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454506356.262, "dur": 35.971, + "args": { + "External id": 469815,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454506413.236, "dur": 20.717, + "args": { + "External id": 469816,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454506442.689, "dur": 62.441, + "args": { + "External id": 469817,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454506512.430, "dur": 47.746, + "args": { + "External id": 469818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454506572.504, "dur": 42.443, + "args": { + "External id": 469819,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454506625.780, "dur": 40.414, + "args": { + "External id": 469820,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454506674.767, "dur": 32.995, + "args": { + "External id": 469821,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454506715.075, "dur": 34.753, + "args": { + "External id": 469822,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267454506777.762, "dur": 27.874, + "args": { + "External id": 469823,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454506827.992, "dur": 28.382, + "args": { + "External id": 469824,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454506875.577, "dur": 20.459, + "args": { + "External id": 469825,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454506915.591, "dur": 16.266, + "args": { + "External id": 469826,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267454506946.499, "dur": 17.102, + "args": { + "External id": 469827,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507092.603, "dur": 21.692, + "args": { + "External id": 469828,"Record function id": 0, "Ev Idx": 8003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507098.638, "dur": 14.206, + "args": { + "External id": 469829,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507104.217, "dur": 7.111, + "args": { + "External id": 469830,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507106.128, "dur": 4.905, + "args": { + "External id": 469831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507121.880, "dur": 5.093, + "args": { + "External id": 469832,"Record function id": 0, "Ev Idx": 8007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507123.359, "dur": 3.092, + "args": { + "External id": 469833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507124.168, "dur": 1.775, + "args": { + "External id": 469834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507125.039, "dur": 0.795, + "args": { + "External id": 469835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507130.925, "dur": 4.824, + "args": { + "External id": 469836,"Record function id": 0, "Ev Idx": 8011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507132.142, "dur": 3.130, + "args": { + "External id": 469837,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507132.803, "dur": 1.882, + "args": { + "External id": 469838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507133.452, "dur": 1.134, + "args": { + "External id": 469839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507139.614, "dur": 4.768, + "args": { + "External id": 469840,"Record function id": 0, "Ev Idx": 8015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507141.023, "dur": 2.832, + "args": { + "External id": 469841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507141.818, "dur": 1.524, + "args": { + "External id": 469842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507142.509, "dur": 0.756, + "args": { + "External id": 469843,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507148.058, "dur": 6.377, + "args": { + "External id": 469844,"Record function id": 0, "Ev Idx": 8019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507149.418, "dur": 4.555, + "args": { + "External id": 469845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507150.017, "dur": 3.300, + "args": { + "External id": 469846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507150.367, "dur": 2.877, + "args": { + "External id": 469847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507158.166, "dur": 7.525, + "args": { + "External id": 469848,"Record function id": 0, "Ev Idx": 8023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507159.533, "dur": 5.688, + "args": { + "External id": 469849,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507160.087, "dur": 4.535, + "args": { + "External id": 469850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507163.740, "dur": 0.804, + "args": { + "External id": 469851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507169.588, "dur": 4.407, + "args": { + "External id": 469852,"Record function id": 0, "Ev Idx": 8027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507170.849, "dur": 2.655, + "args": { + "External id": 469853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507171.562, "dur": 1.435, + "args": { + "External id": 469854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507172.027, "dur": 0.896, + "args": { + "External id": 469855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507177.701, "dur": 4.253, + "args": { + "External id": 469856,"Record function id": 0, "Ev Idx": 8031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507178.942, "dur": 2.551, + "args": { + "External id": 469857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507179.450, "dur": 1.514, + "args": { + "External id": 469858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507180.134, "dur": 0.754, + "args": { + "External id": 469859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507185.575, "dur": 4.433, + "args": { + "External id": 469860,"Record function id": 0, "Ev Idx": 8035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454507187.130, "dur": 2.380, + "args": { + "External id": 469861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507187.718, "dur": 1.324, + "args": { + "External id": 469862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454507188.117, "dur": 0.848, + "args": { + "External id": 469863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454507194.605, "dur": 73497.691, + "args": { + "External id": 469864,"Record function id": 0, "Sequence number": 5036640, "Fwd thread id": 1, "Ev Idx": 8039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454507196.031, "dur": 73485.174, + "args": { + "External id": 469865,"Sequence number": 5036640, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8040 + } + }, + { + "ph": "f", "id": 403, "pid": 2338707, "tid": 2379456, "ts": 6267454507196.031, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.22)", "pid": 2338707, "tid": 2379456, + "ts": 6267454507234.874, "dur": 45.073, + "args": { + "External id": 469866,"Record function id": 0, "Ev Idx": 8041 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.22)", "pid": 2338707, "tid": 2379456, + "ts": 6267454507288.748, "dur": 72.332, + "args": { + "External id": 469867,"Record function id": 0, "Ev Idx": 8042 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.22)", "pid": 2338707, "tid": 2379456, + "ts": 6267454507367.562, "dur": 73303.696, + "args": { + "External id": 469868,"Record function id": 0, "Ev Idx": 8043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454507473.943, "dur": 9.129, + "args": { + "External id": 469869,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454507497.113, "dur": 5.415, + "args": { + "External id": 469870,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454507519.871, "dur": 72008.276, + "args": { + "External id": 469871,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454507535.957, "dur": 71975.075, + "args": { + "External id": 469872,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454507681.030, "dur": 29.600, + "args": { + "External id": 469873,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454507736.035, "dur": 71714.317, + "args": { + "External id": 469874,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454507739.615, "dur": 71709.621, + "args": { + "External id": 469875,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454507746.657, "dur": 18.068, + "args": { + "External id": 469876,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454507769.767, "dur": 71673.525, + "args": { + "External id": 469877,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454579672.980, "dur": 14.109, + "args": { + "External id": 469878,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454579677.013, "dur": 9.323, + "args": { + "External id": 469879,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454579723.659, "dur": 470.579, + "args": { + "External id": 469880,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454579761.615, "dur": 425.967, + "args": { + "External id": 469881,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8056, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454579776.699, "dur": 403.311, + "args": { + "External id": 469882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454580225.695, "dur": 3.118, + "args": { + "External id": 469883,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8058, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454580306.198, "dur": 10.899, + "args": { + "External id": 469884,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454580331.357, "dur": 42.891, + "args": { + "External id": 469885,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454580386.411, "dur": 1.877, + "args": { + "External id": 469886,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454580394.664, "dur": 16.087, + "args": { + "External id": 469887,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454580417.031, "dur": 1.092, + "args": { + "External id": 469888,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454580423.175, "dur": 13.025, + "args": { + "External id": 469889,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454580442.711, "dur": 1.175, + "args": { + "External id": 469890,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454580448.242, "dur": 13.424, + "args": { + "External id": 469891,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454580469.929, "dur": 1.002, + "args": { + "External id": 469892,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454580475.041, "dur": 11.773, + "args": { + "External id": 469893,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454580491.582, "dur": 1.553, + "args": { + "External id": 469894,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454580497.597, "dur": 12.700, + "args": { + "External id": 469895,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454580515.906, "dur": 1.092, + "args": { + "External id": 469896,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454580521.077, "dur": 11.368, + "args": { + "External id": 469897,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454580540.101, "dur": 1.069, + "args": { + "External id": 469898,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454580545.512, "dur": 12.687, + "args": { + "External id": 469899,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454580562.945, "dur": 3.199, + "args": { + "External id": 469900,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454580570.331, "dur": 11.892, + "args": { + "External id": 469901,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454580711.729, "dur": 3441.122, + "args": { + "External id": 469902,"Record function id": 0, "Ev Idx": 8077 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.21)", "pid": 2338707, "tid": 2379456, + "ts": 6267454580737.172, "dur": 1240.455, + "args": { + "External id": 469903,"Record function id": 0, "Ev Idx": 8078 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338707, "tid": 2379456, + "ts": 6267454580757.096, "dur": 428.008, + "args": { + "External id": 469904,"Record function id": 0, "Ev Idx": 8079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454580851.561, "dur": 5.428, + "args": { + "External id": 469905,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454580861.481, "dur": 0.754, + "args": { + "External id": 469906,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454580864.484, "dur": 1.052, + "args": { + "External id": 469907,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454580867.340, "dur": 1.126, + "args": { + "External id": 469908,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454580870.049, "dur": 0.957, + "args": { + "External id": 469909,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454580875.011, "dur": 1.053, + "args": { + "External id": 469910,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454580877.840, "dur": 0.797, + "args": { + "External id": 469911,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454580880.017, "dur": 3.361, + "args": { + "External id": 469912,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454580885.237, "dur": 0.911, + "args": { + "External id": 469913,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454580889.975, "dur": 1.119, + "args": { + "External id": 469914,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454580912.152, "dur": 231.418, + "args": { + "External id": 469915,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454580931.459, "dur": 205.687, + "args": { + "External id": 469916,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454580950.661, "dur": 16.936, + "args": { + "External id": 469917,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454580971.725, "dur": 132.248, + "args": { + "External id": 469918,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454580974.916, "dur": 128.557, + "args": { + "External id": 469919,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454580991.313, "dur": 6.606, + "args": { + "External id": 469920,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454580999.772, "dur": 101.568, + "args": { + "External id": 469921,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.20", "pid": 2338707, "tid": 2379456, + "ts": 6267454581280.708, "dur": 688.319, + "args": { + "External id": 469922,"Record function id": 0, "Ev Idx": 8097 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338707, "tid": 2379456, + "ts": 6267454581300.643, "dur": 653.494, + "args": { + "External id": 469923,"Record function id": 0, "Ev Idx": 8098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454581367.867, "dur": 7.531, + "args": { + "External id": 469924,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454581394.807, "dur": 34.851, + "args": { + "External id": 469925,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581400.811, "dur": 2.016, + "args": { + "External id": 469926,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581405.378, "dur": 1.824, + "args": { + "External id": 469927,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581408.834, "dur": 3.320, + "args": { + "External id": 469928,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581413.852, "dur": 0.650, + "args": { + "External id": 469929,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581417.242, "dur": 0.670, + "args": { + "External id": 469930,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581419.119, "dur": 0.321, + "args": { + "External id": 469931,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581420.696, "dur": 0.402, + "args": { + "External id": 469932,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581423.732, "dur": 0.416, + "args": { + "External id": 469933,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581425.464, "dur": 0.525, + "args": { + "External id": 469934,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454581442.064, "dur": 46.992, + "args": { + "External id": 469935,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267454581525.279, "dur": 153.882, + "args": { + "External id": 469936,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454581537.203, "dur": 4.371, + "args": { + "External id": 469937,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267454581547.477, "dur": 15.892, + "args": { + "External id": 469938,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454581554.787, "dur": 8.110, + "args": { + "External id": 469939,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581558.831, "dur": 2.592, + "args": { + "External id": 469940,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454581571.486, "dur": 46.317, + "args": { + "External id": 469941,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581573.121, "dur": 0.650, + "args": { + "External id": 469942,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581576.239, "dur": 0.621, + "args": { + "External id": 469943,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581578.344, "dur": 0.483, + "args": { + "External id": 469944,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581580.057, "dur": 1.905, + "args": { + "External id": 469945,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581583.150, "dur": 0.463, + "args": { + "External id": 469946,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581584.959, "dur": 0.354, + "args": { + "External id": 469947,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581588.012, "dur": 0.364, + "args": { + "External id": 469948,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581589.877, "dur": 20.077, + "args": { + "External id": 469949,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454581613.404, "dur": 0.519, + "args": { + "External id": 469950,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454581632.467, "dur": 37.774, + "args": { + "External id": 469951,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454581732.515, "dur": 139.329, + "args": { + "External id": 469952,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454581766.051, "dur": 101.452, + "args": { + "External id": 469953,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8128, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454581777.372, "dur": 84.821, + "args": { + "External id": 469954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454581891.112, "dur": 2.467, + "args": { + "External id": 469955,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8130, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454581986.203, "dur": 2138.453, + "args": { + "External id": 469956,"Sequence number": 5036639, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8131 + } + }, + { + "ph": "f", "id": 404, "pid": 2338707, "tid": 2379456, "ts": 6267454581986.203, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454582157.443, "dur": 130.942, + "args": { + "External id": 469957,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454582346.492, "dur": 47.524, + "args": { + "External id": 469958,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267454582417.969, "dur": 59.633, + "args": { + "External id": 469959,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454582489.762, "dur": 37.071, + "args": { + "External id": 469960,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454582533.944, "dur": 37.838, + "args": { + "External id": 469961,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454582579.347, "dur": 50.097, + "args": { + "External id": 469962,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454582644.153, "dur": 37.631, + "args": { + "External id": 469963,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454582716.782, "dur": 27.169, + "args": { + "External id": 469964,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454582770.320, "dur": 31.877, + "args": { + "External id": 469965,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454582826.306, "dur": 21.579, + "args": { + "External id": 469966,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454582867.079, "dur": 16.519, + "args": { + "External id": 469967,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454582893.654, "dur": 43.724, + "args": { + "External id": 469968,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454582941.711, "dur": 37.727, + "args": { + "External id": 469969,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454583012.186, "dur": 364.189, + "args": { + "External id": 469970,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454583151.741, "dur": 8.643, + "args": { + "External id": 469971,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454583162.627, "dur": 3.873, + "args": { + "External id": 469972,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454583167.577, "dur": 4.868, + "args": { + "External id": 469973,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454583173.613, "dur": 4.107, + "args": { + "External id": 469974,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454583241.649, "dur": 7.327, + "args": { + "External id": 469975,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454583244.153, "dur": 4.025, + "args": { + "External id": 469976,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454583251.025, "dur": 42.490, + "args": { + "External id": 469977,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454583258.741, "dur": 2.454, + "args": { + "External id": 469978,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454583295.721, "dur": 2.013, + "args": { + "External id": 469979,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454583296.743, "dur": 0.915, + "args": { + "External id": 469980,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454583298.762, "dur": 17.460, + "args": { + "External id": 469981,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454583300.692, "dur": 0.697, + "args": { + "External id": 469982,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454583427.850, "dur": 36.889, + "args": { + "External id": 469983,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454583485.912, "dur": 20.468, + "args": { + "External id": 469984,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454583515.665, "dur": 62.049, + "args": { + "External id": 469985,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454583585.292, "dur": 71.017, + "args": { + "External id": 469986,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454583673.202, "dur": 29.456, + "args": { + "External id": 469987,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454583708.993, "dur": 37.220, + "args": { + "External id": 469988,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454583754.810, "dur": 32.984, + "args": { + "External id": 469989,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454583794.380, "dur": 34.414, + "args": { + "External id": 469990,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267454583855.754, "dur": 31.286, + "args": { + "External id": 469991,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454583909.159, "dur": 30.623, + "args": { + "External id": 469992,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454583956.773, "dur": 20.421, + "args": { + "External id": 469993,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454583996.965, "dur": 19.231, + "args": { + "External id": 469994,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267454584030.728, "dur": 17.322, + "args": { + "External id": 469995,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584180.444, "dur": 17.931, + "args": { + "External id": 469996,"Record function id": 0, "Ev Idx": 8171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584184.319, "dur": 12.902, + "args": { + "External id": 469997,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584189.498, "dur": 6.440, + "args": { + "External id": 469998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584191.333, "dur": 4.507, + "args": { + "External id": 469999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584202.891, "dur": 5.209, + "args": { + "External id": 470000,"Record function id": 0, "Ev Idx": 8175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584204.493, "dur": 3.065, + "args": { + "External id": 470001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584205.288, "dur": 1.613, + "args": { + "External id": 470002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584206.106, "dur": 0.671, + "args": { + "External id": 470003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584212.010, "dur": 4.828, + "args": { + "External id": 470004,"Record function id": 0, "Ev Idx": 8179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584213.288, "dur": 3.014, + "args": { + "External id": 470005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584213.879, "dur": 1.906, + "args": { + "External id": 470006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584214.803, "dur": 0.860, + "args": { + "External id": 470007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584220.611, "dur": 4.544, + "args": { + "External id": 470008,"Record function id": 0, "Ev Idx": 8183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584222.002, "dur": 2.660, + "args": { + "External id": 470009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584222.696, "dur": 1.461, + "args": { + "External id": 470010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584223.110, "dur": 0.948, + "args": { + "External id": 470011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584228.928, "dur": 3.838, + "args": { + "External id": 470012,"Record function id": 0, "Ev Idx": 8187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584230.170, "dur": 2.111, + "args": { + "External id": 470013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584230.702, "dur": 1.110, + "args": { + "External id": 470014,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584231.054, "dur": 0.678, + "args": { + "External id": 470015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584236.459, "dur": 53.977, + "args": { + "External id": 470016,"Record function id": 0, "Ev Idx": 8191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584282.241, "dur": 7.612, + "args": { + "External id": 470017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584282.997, "dur": 6.157, + "args": { + "External id": 470018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584285.868, "dur": 3.120, + "args": { + "External id": 470019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584294.443, "dur": 4.941, + "args": { + "External id": 470020,"Record function id": 0, "Ev Idx": 8195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584296.252, "dur": 2.642, + "args": { + "External id": 470021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584296.867, "dur": 1.538, + "args": { + "External id": 470022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584297.572, "dur": 0.750, + "args": { + "External id": 470023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584303.105, "dur": 4.019, + "args": { + "External id": 470024,"Record function id": 0, "Ev Idx": 8199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584304.237, "dur": 2.401, + "args": { + "External id": 470025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584304.768, "dur": 1.351, + "args": { + "External id": 470026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584305.322, "dur": 0.718, + "args": { + "External id": 470027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584310.759, "dur": 4.279, + "args": { + "External id": 470028,"Record function id": 0, "Ev Idx": 8203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454584312.114, "dur": 2.461, + "args": { + "External id": 470029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584312.726, "dur": 1.332, + "args": { + "External id": 470030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454584313.362, "dur": 0.617, + "args": { + "External id": 470031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454584319.584, "dur": 76795.065, + "args": { + "External id": 470032,"Record function id": 0, "Sequence number": 5036638, "Fwd thread id": 1, "Ev Idx": 8207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454584321.074, "dur": 76781.197, + "args": { + "External id": 470033,"Sequence number": 5036638, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8208 + } + }, + { + "ph": "f", "id": 405, "pid": 2338707, "tid": 2379456, "ts": 6267454584321.074, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.21)", "pid": 2338707, "tid": 2379456, + "ts": 6267454584359.164, "dur": 46.962, + "args": { + "External id": 470034,"Record function id": 0, "Ev Idx": 8209 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.21)", "pid": 2338707, "tid": 2379456, + "ts": 6267454584415.453, "dur": 73.558, + "args": { + "External id": 470035,"Record function id": 0, "Ev Idx": 8210 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.21)", "pid": 2338707, "tid": 2379456, + "ts": 6267454584496.342, "dur": 76594.799, + "args": { + "External id": 470036,"Record function id": 0, "Ev Idx": 8211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454584621.831, "dur": 10.366, + "args": { + "External id": 470037,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454584645.563, "dur": 6.240, + "args": { + "External id": 470038,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454584672.176, "dur": 75166.203, + "args": { + "External id": 470039,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454584691.218, "dur": 75131.748, + "args": { + "External id": 470040,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454584796.533, "dur": 21.300, + "args": { + "External id": 470041,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454584842.268, "dur": 74925.021, + "args": { + "External id": 470042,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454584845.484, "dur": 74920.292, + "args": { + "External id": 470043,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454584851.311, "dur": 12.315, + "args": { + "External id": 470044,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454584866.208, "dur": 74893.086, + "args": { + "External id": 470045,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454659979.119, "dur": 16.076, + "args": { + "External id": 470046,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454659984.487, "dur": 10.103, + "args": { + "External id": 470047,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454660039.522, "dur": 525.274, + "args": { + "External id": 470048,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454660111.745, "dur": 445.782, + "args": { + "External id": 470049,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8224, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454660129.139, "dur": 421.059, + "args": { + "External id": 470050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454660612.547, "dur": 4.139, + "args": { + "External id": 470051,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8226, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454660700.630, "dur": 10.823, + "args": { + "External id": 470052,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454660726.356, "dur": 43.851, + "args": { + "External id": 470053,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454660782.560, "dur": 1.887, + "args": { + "External id": 470054,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454660790.867, "dur": 17.184, + "args": { + "External id": 470055,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454660814.181, "dur": 0.949, + "args": { + "External id": 470056,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454660819.745, "dur": 13.590, + "args": { + "External id": 470057,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454660838.914, "dur": 1.073, + "args": { + "External id": 470058,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454660844.507, "dur": 14.278, + "args": { + "External id": 470059,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454660863.479, "dur": 1.014, + "args": { + "External id": 470060,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454660868.961, "dur": 13.126, + "args": { + "External id": 470061,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454660886.920, "dur": 1.849, + "args": { + "External id": 470062,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454660895.778, "dur": 14.458, + "args": { + "External id": 470063,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454660916.092, "dur": 1.362, + "args": { + "External id": 470064,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454660922.427, "dur": 12.630, + "args": { + "External id": 470065,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454660939.919, "dur": 0.926, + "args": { + "External id": 470066,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454660945.526, "dur": 12.198, + "args": { + "External id": 470067,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454660964.871, "dur": 3.076, + "args": { + "External id": 470068,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454660972.527, "dur": 11.288, + "args": { + "External id": 470069,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454661134.836, "dur": 3481.684, + "args": { + "External id": 470070,"Record function id": 0, "Ev Idx": 8245 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.20)", "pid": 2338707, "tid": 2379456, + "ts": 6267454661161.309, "dur": 1257.996, + "args": { + "External id": 470071,"Record function id": 0, "Ev Idx": 8246 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338707, "tid": 2379456, + "ts": 6267454661182.150, "dur": 390.924, + "args": { + "External id": 470072,"Record function id": 0, "Ev Idx": 8247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454661281.438, "dur": 5.849, + "args": { + "External id": 470073,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454661292.110, "dur": 0.709, + "args": { + "External id": 470074,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454661295.110, "dur": 1.362, + "args": { + "External id": 470075,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454661298.531, "dur": 0.975, + "args": { + "External id": 470076,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454661301.232, "dur": 1.001, + "args": { + "External id": 470077,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454661304.251, "dur": 1.056, + "args": { + "External id": 470078,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454661306.947, "dur": 0.788, + "args": { + "External id": 470079,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454661313.011, "dur": 2.977, + "args": { + "External id": 470080,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454661318.288, "dur": 0.963, + "args": { + "External id": 470081,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454661320.900, "dur": 1.026, + "args": { + "External id": 470082,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454661342.839, "dur": 194.079, + "args": { + "External id": 470083,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454661363.448, "dur": 167.374, + "args": { + "External id": 470084,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454661386.754, "dur": 16.792, + "args": { + "External id": 470085,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454661407.822, "dur": 92.091, + "args": { + "External id": 470086,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454661411.038, "dur": 88.393, + "args": { + "External id": 470087,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661427.385, "dur": 6.830, + "args": { + "External id": 470088,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454661436.088, "dur": 62.501, + "args": { + "External id": 470089,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8264 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 2338707, "tid": 2379456, + "ts": 6267454661689.161, "dur": 721.562, + "args": { + "External id": 470090,"Record function id": 0, "Ev Idx": 8265 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338707, "tid": 2379456, + "ts": 6267454661709.820, "dur": 685.243, + "args": { + "External id": 470091,"Record function id": 0, "Ev Idx": 8266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454661777.643, "dur": 6.994, + "args": { + "External id": 470092,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454661804.510, "dur": 32.740, + "args": { + "External id": 470093,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661809.936, "dur": 1.796, + "args": { + "External id": 470094,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661814.530, "dur": 1.420, + "args": { + "External id": 470095,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661817.164, "dur": 2.987, + "args": { + "External id": 470096,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661821.663, "dur": 0.520, + "args": { + "External id": 470097,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661824.761, "dur": 0.539, + "args": { + "External id": 470098,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661826.638, "dur": 0.472, + "args": { + "External id": 470099,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661828.255, "dur": 0.588, + "args": { + "External id": 470100,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661831.195, "dur": 0.632, + "args": { + "External id": 470101,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661833.011, "dur": 0.284, + "args": { + "External id": 470102,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454661849.718, "dur": 51.682, + "args": { + "External id": 470103,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267454661936.254, "dur": 173.099, + "args": { + "External id": 470104,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454661948.010, "dur": 4.045, + "args": { + "External id": 470105,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267454661957.985, "dur": 13.805, + "args": { + "External id": 470106,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454661963.584, "dur": 7.681, + "args": { + "External id": 470107,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661967.252, "dur": 2.600, + "args": { + "External id": 470108,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454661984.637, "dur": 23.720, + "args": { + "External id": 470109,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661986.357, "dur": 0.649, + "args": { + "External id": 470110,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661989.087, "dur": 0.690, + "args": { + "External id": 470111,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661990.917, "dur": 0.328, + "args": { + "External id": 470112,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661992.248, "dur": 0.886, + "args": { + "External id": 470113,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661994.817, "dur": 0.642, + "args": { + "External id": 470114,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661996.767, "dur": 0.630, + "args": { + "External id": 470115,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454661998.946, "dur": 0.520, + "args": { + "External id": 470116,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454662000.684, "dur": 2.516, + "args": { + "External id": 470117,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454662004.450, "dur": 0.260, + "args": { + "External id": 470118,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454662021.482, "dur": 76.662, + "args": { + "External id": 470119,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454662167.023, "dur": 144.154, + "args": { + "External id": 470120,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454662204.969, "dur": 101.953, + "args": { + "External id": 470121,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8296, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454662217.497, "dur": 84.486, + "args": { + "External id": 470122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454662332.207, "dur": 2.233, + "args": { + "External id": 470123,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8298, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454662427.975, "dur": 2148.359, + "args": { + "External id": 470124,"Sequence number": 5036637, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8299 + } + }, + { + "ph": "f", "id": 406, "pid": 2338707, "tid": 2379456, "ts": 6267454662427.975, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454662553.283, "dur": 150.783, + "args": { + "External id": 470125,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454662759.966, "dur": 46.033, + "args": { + "External id": 470126,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267454662830.136, "dur": 61.326, + "args": { + "External id": 470127,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454662903.797, "dur": 36.877, + "args": { + "External id": 470128,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454662948.351, "dur": 36.738, + "args": { + "External id": 470129,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454662992.829, "dur": 32.248, + "args": { + "External id": 470130,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454663035.620, "dur": 76.352, + "args": { + "External id": 470131,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454663151.400, "dur": 28.389, + "args": { + "External id": 470132,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454663204.477, "dur": 35.588, + "args": { + "External id": 470133,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454663264.812, "dur": 25.306, + "args": { + "External id": 470134,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454663306.731, "dur": 20.187, + "args": { + "External id": 470135,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454663339.961, "dur": 51.456, + "args": { + "External id": 470136,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454663396.327, "dur": 39.009, + "args": { + "External id": 470137,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454663468.964, "dur": 358.851, + "args": { + "External id": 470138,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454663563.588, "dur": 7.767, + "args": { + "External id": 470139,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454663574.188, "dur": 3.453, + "args": { + "External id": 470140,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454663579.084, "dur": 35.273, + "args": { + "External id": 470141,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454663623.162, "dur": 5.375, + "args": { + "External id": 470142,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454663689.872, "dur": 6.692, + "args": { + "External id": 470143,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454663692.642, "dur": 3.705, + "args": { + "External id": 470144,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454663698.564, "dur": 41.926, + "args": { + "External id": 470145,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454663705.734, "dur": 2.045, + "args": { + "External id": 470146,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454663741.962, "dur": 2.813, + "args": { + "External id": 470147,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454663743.562, "dur": 1.123, + "args": { + "External id": 470148,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454663746.392, "dur": 17.341, + "args": { + "External id": 470149,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454663748.572, "dur": 0.633, + "args": { + "External id": 470150,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454663872.709, "dur": 36.885, + "args": { + "External id": 470151,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454663938.218, "dur": 21.823, + "args": { + "External id": 470152,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454663970.960, "dur": 62.607, + "args": { + "External id": 470153,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454664042.565, "dur": 94.847, + "args": { + "External id": 470154,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454664156.148, "dur": 32.796, + "args": { + "External id": 470155,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454664197.438, "dur": 38.312, + "args": { + "External id": 470156,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454664245.488, "dur": 35.064, + "args": { + "External id": 470157,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454664289.321, "dur": 35.257, + "args": { + "External id": 470158,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267454664347.850, "dur": 33.517, + "args": { + "External id": 470159,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454664398.972, "dur": 31.099, + "args": { + "External id": 470160,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454664446.324, "dur": 22.107, + "args": { + "External id": 470161,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454664483.900, "dur": 18.808, + "args": { + "External id": 470162,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267454664517.136, "dur": 19.149, + "args": { + "External id": 470163,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664643.933, "dur": 19.410, + "args": { + "External id": 470164,"Record function id": 0, "Ev Idx": 8339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664648.028, "dur": 14.144, + "args": { + "External id": 470165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664653.003, "dur": 7.553, + "args": { + "External id": 470166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664655.244, "dur": 5.155, + "args": { + "External id": 470167,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664668.108, "dur": 6.370, + "args": { + "External id": 470168,"Record function id": 0, "Ev Idx": 8343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664669.858, "dur": 4.091, + "args": { + "External id": 470169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664670.900, "dur": 2.398, + "args": { + "External id": 470170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664671.780, "dur": 1.381, + "args": { + "External id": 470171,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664678.431, "dur": 4.867, + "args": { + "External id": 470172,"Record function id": 0, "Ev Idx": 8347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664679.995, "dur": 2.814, + "args": { + "External id": 470173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664680.605, "dur": 1.653, + "args": { + "External id": 470174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664680.998, "dur": 1.184, + "args": { + "External id": 470175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664687.067, "dur": 4.243, + "args": { + "External id": 470176,"Record function id": 0, "Ev Idx": 8351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664688.225, "dur": 2.607, + "args": { + "External id": 470177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664688.867, "dur": 1.465, + "args": { + "External id": 470178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664689.392, "dur": 0.867, + "args": { + "External id": 470179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664695.092, "dur": 6.616, + "args": { + "External id": 470180,"Record function id": 0, "Ev Idx": 8355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664696.419, "dur": 4.824, + "args": { + "External id": 470181,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664696.973, "dur": 3.765, + "args": { + "External id": 470182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664697.292, "dur": 3.340, + "args": { + "External id": 470183,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664705.598, "dur": 5.660, + "args": { + "External id": 470184,"Record function id": 0, "Ev Idx": 8359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664707.005, "dur": 3.693, + "args": { + "External id": 470185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664707.695, "dur": 2.479, + "args": { + "External id": 470186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664708.889, "dur": 1.170, + "args": { + "External id": 470187,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664715.132, "dur": 4.335, + "args": { + "External id": 470188,"Record function id": 0, "Ev Idx": 8363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664716.281, "dur": 2.697, + "args": { + "External id": 470189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664717.199, "dur": 1.265, + "args": { + "External id": 470190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664717.620, "dur": 0.738, + "args": { + "External id": 470191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664723.085, "dur": 4.429, + "args": { + "External id": 470192,"Record function id": 0, "Ev Idx": 8367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664724.468, "dur": 2.579, + "args": { + "External id": 470193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664725.074, "dur": 1.430, + "args": { + "External id": 470194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664725.537, "dur": 0.853, + "args": { + "External id": 470195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664731.879, "dur": 4.250, + "args": { + "External id": 470196,"Record function id": 0, "Ev Idx": 8371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454664733.065, "dur": 2.601, + "args": { + "External id": 470197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664733.601, "dur": 1.587, + "args": { + "External id": 470198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454664734.282, "dur": 0.769, + "args": { + "External id": 470199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454664741.046, "dur": 68383.928, + "args": { + "External id": 470200,"Record function id": 0, "Sequence number": 5036636, "Fwd thread id": 1, "Ev Idx": 8375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454664742.515, "dur": 68369.541, + "args": { + "External id": 470201,"Sequence number": 5036636, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8376 + } + }, + { + "ph": "f", "id": 407, "pid": 2338707, "tid": 2379456, "ts": 6267454664742.515, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.20)", "pid": 2338707, "tid": 2379456, + "ts": 6267454664780.159, "dur": 46.804, + "args": { + "External id": 470202,"Record function id": 0, "Ev Idx": 8377 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.20)", "pid": 2338707, "tid": 2379456, + "ts": 6267454664835.951, "dur": 78.468, + "args": { + "External id": 470203,"Record function id": 0, "Ev Idx": 8378 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.20)", "pid": 2338707, "tid": 2379456, + "ts": 6267454664921.452, "dur": 68178.553, + "args": { + "External id": 470204,"Record function id": 0, "Ev Idx": 8379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454665029.312, "dur": 8.007, + "args": { + "External id": 470205,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454665049.105, "dur": 5.661, + "args": { + "External id": 470206,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454665120.878, "dur": 66744.725, + "args": { + "External id": 470207,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454665140.100, "dur": 66708.750, + "args": { + "External id": 470208,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454665253.638, "dur": 22.012, + "args": { + "External id": 470209,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454665299.590, "dur": 66492.106, + "args": { + "External id": 470210,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454665303.134, "dur": 66486.915, + "args": { + "External id": 470211,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454665322.114, "dur": 15.843, + "args": { + "External id": 470212,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454665340.896, "dur": 66443.385, + "args": { + "External id": 470213,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454732008.429, "dur": 15.968, + "args": { + "External id": 470214,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454732013.358, "dur": 10.553, + "args": { + "External id": 470215,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454732101.138, "dur": 456.253, + "args": { + "External id": 470216,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454732146.329, "dur": 403.771, + "args": { + "External id": 470217,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8392, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454732164.524, "dur": 375.790, + "args": { + "External id": 470218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454732586.011, "dur": 3.713, + "args": { + "External id": 470219,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8394, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454732691.592, "dur": 11.044, + "args": { + "External id": 470220,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454732718.584, "dur": 47.808, + "args": { + "External id": 470221,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454732778.439, "dur": 2.884, + "args": { + "External id": 470222,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454732788.188, "dur": 16.720, + "args": { + "External id": 470223,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454732812.107, "dur": 0.986, + "args": { + "External id": 470224,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454732819.649, "dur": 14.238, + "args": { + "External id": 470225,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454732840.011, "dur": 1.033, + "args": { + "External id": 470226,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454732846.778, "dur": 13.829, + "args": { + "External id": 470227,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454732865.819, "dur": 1.031, + "args": { + "External id": 470228,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454732870.963, "dur": 13.356, + "args": { + "External id": 470229,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454732888.889, "dur": 1.296, + "args": { + "External id": 470230,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454732894.705, "dur": 14.630, + "args": { + "External id": 470231,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454732914.453, "dur": 1.449, + "args": { + "External id": 470232,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454732920.913, "dur": 13.993, + "args": { + "External id": 470233,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454732942.239, "dur": 1.059, + "args": { + "External id": 470234,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454732948.713, "dur": 15.317, + "args": { + "External id": 470235,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454732968.599, "dur": 3.437, + "args": { + "External id": 470236,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454732976.353, "dur": 13.876, + "args": { + "External id": 470237,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454733149.195, "dur": 3437.701, + "args": { + "External id": 470238,"Record function id": 0, "Ev Idx": 8413 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 2338707, "tid": 2379456, + "ts": 6267454733175.907, "dur": 1251.057, + "args": { + "External id": 470239,"Record function id": 0, "Ev Idx": 8414 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338707, "tid": 2379456, + "ts": 6267454733195.223, "dur": 415.340, + "args": { + "External id": 470240,"Record function id": 0, "Ev Idx": 8415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454733295.051, "dur": 5.713, + "args": { + "External id": 470241,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454733305.206, "dur": 1.582, + "args": { + "External id": 470242,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454733308.967, "dur": 1.329, + "args": { + "External id": 470243,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454733311.885, "dur": 1.143, + "args": { + "External id": 470244,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454733314.883, "dur": 0.824, + "args": { + "External id": 470245,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454733317.653, "dur": 0.932, + "args": { + "External id": 470246,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454733322.369, "dur": 1.001, + "args": { + "External id": 470247,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454733324.895, "dur": 3.959, + "args": { + "External id": 470248,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454733331.041, "dur": 0.766, + "args": { + "External id": 470249,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454733333.370, "dur": 0.658, + "args": { + "External id": 470250,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454733357.477, "dur": 193.059, + "args": { + "External id": 470251,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454733377.908, "dur": 166.691, + "args": { + "External id": 470252,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454733409.238, "dur": 18.399, + "args": { + "External id": 470253,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454733432.292, "dur": 79.971, + "args": { + "External id": 470254,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454733435.664, "dur": 76.126, + "args": { + "External id": 470255,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454733440.951, "dur": 6.530, + "args": { + "External id": 470256,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454733449.849, "dur": 61.048, + "args": { + "External id": 470257,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8432 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 2338707, "tid": 2379456, + "ts": 6267454733710.718, "dur": 707.050, + "args": { + "External id": 470258,"Record function id": 0, "Ev Idx": 8433 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338707, "tid": 2379456, + "ts": 6267454733731.766, "dur": 671.001, + "args": { + "External id": 470259,"Record function id": 0, "Ev Idx": 8434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454733801.093, "dur": 7.347, + "args": { + "External id": 470260,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454733826.478, "dur": 34.701, + "args": { + "External id": 470261,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454733832.689, "dur": 1.807, + "args": { + "External id": 470262,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454733837.398, "dur": 0.555, + "args": { + "External id": 470263,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454733839.336, "dur": 3.358, + "args": { + "External id": 470264,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454733844.688, "dur": 0.740, + "args": { + "External id": 470265,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454733847.483, "dur": 0.392, + "args": { + "External id": 470266,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454733849.185, "dur": 0.704, + "args": { + "External id": 470267,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454733851.955, "dur": 0.835, + "args": { + "External id": 470268,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454733854.426, "dur": 0.900, + "args": { + "External id": 470269,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454733856.565, "dur": 0.433, + "args": { + "External id": 470270,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454733874.031, "dur": 49.567, + "args": { + "External id": 470271,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267454733958.518, "dur": 166.791, + "args": { + "External id": 470272,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454733970.661, "dur": 3.577, + "args": { + "External id": 470273,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267454733980.403, "dur": 15.017, + "args": { + "External id": 470274,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454733985.602, "dur": 9.354, + "args": { + "External id": 470275,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454733989.849, "dur": 3.415, + "args": { + "External id": 470276,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454734003.186, "dur": 25.662, + "args": { + "External id": 470277,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454734005.354, "dur": 0.659, + "args": { + "External id": 470278,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454734007.116, "dur": 0.660, + "args": { + "External id": 470279,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454734009.779, "dur": 0.531, + "args": { + "External id": 470280,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454734012.235, "dur": 0.349, + "args": { + "External id": 470281,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454734013.816, "dur": 0.929, + "args": { + "External id": 470282,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454734016.525, "dur": 0.397, + "args": { + "External id": 470283,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454734018.810, "dur": 0.443, + "args": { + "External id": 470284,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454734020.378, "dur": 2.996, + "args": { + "External id": 470285,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454734025.067, "dur": 0.542, + "args": { + "External id": 470286,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454734041.129, "dur": 72.527, + "args": { + "External id": 470287,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454734182.377, "dur": 136.387, + "args": { + "External id": 470288,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454734211.559, "dur": 103.144, + "args": { + "External id": 470289,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8464, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454734223.665, "dur": 86.303, + "args": { + "External id": 470290,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454734339.107, "dur": 2.326, + "args": { + "External id": 470291,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8466, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454734435.714, "dur": 2126.710, + "args": { + "External id": 470292,"Sequence number": 5036635, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8467 + } + }, + { + "ph": "f", "id": 408, "pid": 2338707, "tid": 2379456, "ts": 6267454734435.714, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454734562.345, "dur": 150.669, + "args": { + "External id": 470293,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454734768.554, "dur": 48.530, + "args": { + "External id": 470294,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267454734841.125, "dur": 62.697, + "args": { + "External id": 470295,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454734916.098, "dur": 38.928, + "args": { + "External id": 470296,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454734962.419, "dur": 37.769, + "args": { + "External id": 470297,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454735007.699, "dur": 33.426, + "args": { + "External id": 470298,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454735051.271, "dur": 79.471, + "args": { + "External id": 470299,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454735169.839, "dur": 29.660, + "args": { + "External id": 470300,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454735224.766, "dur": 32.822, + "args": { + "External id": 470301,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454735285.771, "dur": 23.524, + "args": { + "External id": 470302,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454735326.494, "dur": 17.505, + "args": { + "External id": 470303,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454735357.257, "dur": 47.803, + "args": { + "External id": 470304,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454735409.448, "dur": 56.739, + "args": { + "External id": 470305,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454735509.593, "dur": 327.732, + "args": { + "External id": 470306,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454735635.867, "dur": 8.612, + "args": { + "External id": 470307,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454735647.318, "dur": 3.711, + "args": { + "External id": 470308,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454735652.519, "dur": 5.131, + "args": { + "External id": 470309,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454735658.944, "dur": 3.247, + "args": { + "External id": 470310,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454735716.126, "dur": 6.337, + "args": { + "External id": 470311,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454735718.714, "dur": 3.480, + "args": { + "External id": 470312,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454735724.570, "dur": 39.442, + "args": { + "External id": 470313,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454735731.458, "dur": 2.230, + "args": { + "External id": 470314,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454735766.004, "dur": 2.351, + "args": { + "External id": 470315,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454735767.451, "dur": 0.823, + "args": { + "External id": 470316,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454735769.991, "dur": 16.752, + "args": { + "External id": 470317,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454735772.821, "dur": 0.763, + "args": { + "External id": 470318,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454735879.277, "dur": 33.720, + "args": { + "External id": 470319,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454735933.070, "dur": 20.179, + "args": { + "External id": 470320,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454735963.654, "dur": 60.494, + "args": { + "External id": 470321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454736033.201, "dur": 96.461, + "args": { + "External id": 470322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454736149.463, "dur": 32.707, + "args": { + "External id": 470323,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454736190.669, "dur": 38.233, + "args": { + "External id": 470324,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454736238.451, "dur": 32.748, + "args": { + "External id": 470325,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454736279.335, "dur": 38.745, + "args": { + "External id": 470326,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267454736341.700, "dur": 33.909, + "args": { + "External id": 470327,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454736394.900, "dur": 28.157, + "args": { + "External id": 470328,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454736438.383, "dur": 20.750, + "args": { + "External id": 470329,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454736474.739, "dur": 16.737, + "args": { + "External id": 470330,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267454736505.401, "dur": 18.083, + "args": { + "External id": 470331,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736631.254, "dur": 19.696, + "args": { + "External id": 470332,"Record function id": 0, "Ev Idx": 8507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736635.565, "dur": 13.991, + "args": { + "External id": 470333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736640.503, "dur": 7.468, + "args": { + "External id": 470334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736642.581, "dur": 5.100, + "args": { + "External id": 470335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736655.728, "dur": 5.907, + "args": { + "External id": 470336,"Record function id": 0, "Ev Idx": 8511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736657.285, "dur": 3.873, + "args": { + "External id": 470337,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736658.156, "dur": 2.295, + "args": { + "External id": 470338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736659.070, "dur": 1.292, + "args": { + "External id": 470339,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736665.440, "dur": 5.149, + "args": { + "External id": 470340,"Record function id": 0, "Ev Idx": 8515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736666.970, "dur": 3.154, + "args": { + "External id": 470341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736667.567, "dur": 2.049, + "args": { + "External id": 470342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736668.277, "dur": 1.267, + "args": { + "External id": 470343,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736674.317, "dur": 4.111, + "args": { + "External id": 470344,"Record function id": 0, "Ev Idx": 8519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736675.540, "dur": 2.427, + "args": { + "External id": 470345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736676.230, "dur": 1.254, + "args": { + "External id": 470346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736676.630, "dur": 0.780, + "args": { + "External id": 470347,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736682.128, "dur": 6.765, + "args": { + "External id": 470348,"Record function id": 0, "Ev Idx": 8523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736683.670, "dur": 4.758, + "args": { + "External id": 470349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736684.362, "dur": 3.573, + "args": { + "External id": 470350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736684.683, "dur": 3.179, + "args": { + "External id": 470351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736692.597, "dur": 5.240, + "args": { + "External id": 470352,"Record function id": 0, "Ev Idx": 8527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736694.323, "dur": 2.992, + "args": { + "External id": 470353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736694.875, "dur": 1.948, + "args": { + "External id": 470354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736695.710, "dur": 0.973, + "args": { + "External id": 470355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736701.846, "dur": 4.274, + "args": { + "External id": 470356,"Record function id": 0, "Ev Idx": 8531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736703.178, "dur": 2.448, + "args": { + "External id": 470357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736703.809, "dur": 1.324, + "args": { + "External id": 470358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736704.316, "dur": 0.741, + "args": { + "External id": 470359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736709.870, "dur": 17.020, + "args": { + "External id": 470360,"Record function id": 0, "Ev Idx": 8535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736723.467, "dur": 2.843, + "args": { + "External id": 470361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736724.222, "dur": 1.582, + "args": { + "External id": 470362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736724.699, "dur": 0.998, + "args": { + "External id": 470363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736730.887, "dur": 6.842, + "args": { + "External id": 470364,"Record function id": 0, "Ev Idx": 8539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454736732.310, "dur": 4.847, + "args": { + "External id": 470365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736733.086, "dur": 3.525, + "args": { + "External id": 470366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454736735.679, "dur": 0.796, + "args": { + "External id": 470367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454736742.579, "dur": 62301.179, + "args": { + "External id": 470368,"Record function id": 0, "Sequence number": 5036634, "Fwd thread id": 1, "Ev Idx": 8543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454736750.746, "dur": 62282.534, + "args": { + "External id": 470369,"Sequence number": 5036634, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8544 + } + }, + { + "ph": "f", "id": 409, "pid": 2338707, "tid": 2379456, "ts": 6267454736750.746, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 2338707, "tid": 2379456, + "ts": 6267454736788.107, "dur": 50.033, + "args": { + "External id": 470370,"Record function id": 0, "Ev Idx": 8545 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 2338707, "tid": 2379456, + "ts": 6267454736847.024, "dur": 73.066, + "args": { + "External id": 470371,"Record function id": 0, "Ev Idx": 8546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 2338707, "tid": 2379456, + "ts": 6267454736927.153, "dur": 62096.992, + "args": { + "External id": 470372,"Record function id": 0, "Ev Idx": 8547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454737032.200, "dur": 8.821, + "args": { + "External id": 470373,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454737053.085, "dur": 44.380, + "args": { + "External id": 470374,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454737118.765, "dur": 60761.908, + "args": { + "External id": 470375,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454737145.758, "dur": 60719.289, + "args": { + "External id": 470376,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454737264.526, "dur": 23.678, + "args": { + "External id": 470377,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454737312.025, "dur": 60501.533, + "args": { + "External id": 470378,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454737315.830, "dur": 60496.348, + "args": { + "External id": 470379,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454737321.266, "dur": 13.271, + "args": { + "External id": 470380,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454737337.225, "dur": 60469.711, + "args": { + "External id": 470381,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454798009.498, "dur": 14.943, + "args": { + "External id": 470382,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454798014.761, "dur": 9.218, + "args": { + "External id": 470383,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454798097.590, "dur": 452.614, + "args": { + "External id": 470384,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454798139.904, "dur": 404.606, + "args": { + "External id": 470385,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8560, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454798157.799, "dur": 379.200, + "args": { + "External id": 470386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454798574.267, "dur": 2.436, + "args": { + "External id": 470387,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8562, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454798676.566, "dur": 11.228, + "args": { + "External id": 470388,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454798702.295, "dur": 45.172, + "args": { + "External id": 470389,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454798760.417, "dur": 1.990, + "args": { + "External id": 470390,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454798768.791, "dur": 15.331, + "args": { + "External id": 470391,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454798790.167, "dur": 1.204, + "args": { + "External id": 470392,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454798796.592, "dur": 12.396, + "args": { + "External id": 470393,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454798814.550, "dur": 1.390, + "args": { + "External id": 470394,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454798819.878, "dur": 12.396, + "args": { + "External id": 470395,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454798837.127, "dur": 1.184, + "args": { + "External id": 470396,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454798842.849, "dur": 11.166, + "args": { + "External id": 470397,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454798858.674, "dur": 1.333, + "args": { + "External id": 470398,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454798865.162, "dur": 12.769, + "args": { + "External id": 470399,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454798885.239, "dur": 1.069, + "args": { + "External id": 470400,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454798891.387, "dur": 12.719, + "args": { + "External id": 470401,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454798908.708, "dur": 1.020, + "args": { + "External id": 470402,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454798913.816, "dur": 14.082, + "args": { + "External id": 470403,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454798932.899, "dur": 3.683, + "args": { + "External id": 470404,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454798941.843, "dur": 12.245, + "args": { + "External id": 470405,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454799101.295, "dur": 3425.876, + "args": { + "External id": 470406,"Record function id": 0, "Ev Idx": 8581 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 2338707, "tid": 2379456, + "ts": 6267454799131.301, "dur": 1256.759, + "args": { + "External id": 470407,"Record function id": 0, "Ev Idx": 8582 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338707, "tid": 2379456, + "ts": 6267454799149.483, "dur": 394.164, + "args": { + "External id": 470408,"Record function id": 0, "Ev Idx": 8583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454799248.139, "dur": 5.849, + "args": { + "External id": 470409,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454799258.397, "dur": 1.249, + "args": { + "External id": 470410,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454799262.144, "dur": 1.295, + "args": { + "External id": 470411,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454799265.742, "dur": 1.262, + "args": { + "External id": 470412,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454799268.442, "dur": 0.831, + "args": { + "External id": 470413,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454799271.397, "dur": 1.041, + "args": { + "External id": 470414,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454799274.210, "dur": 1.086, + "args": { + "External id": 470415,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454799277.240, "dur": 3.761, + "args": { + "External id": 470416,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454799285.645, "dur": 1.536, + "args": { + "External id": 470417,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454799288.963, "dur": 1.050, + "args": { + "External id": 470418,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454799310.816, "dur": 191.860, + "args": { + "External id": 470419,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454799330.417, "dur": 166.356, + "args": { + "External id": 470420,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454799362.708, "dur": 17.143, + "args": { + "External id": 470421,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454799384.556, "dur": 78.086, + "args": { + "External id": 470422,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454799387.777, "dur": 74.436, + "args": { + "External id": 470423,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799392.733, "dur": 6.584, + "args": { + "External id": 470424,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454799401.539, "dur": 59.814, + "args": { + "External id": 470425,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8600 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 2338707, "tid": 2379456, + "ts": 6267454799660.280, "dur": 717.687, + "args": { + "External id": 470426,"Record function id": 0, "Ev Idx": 8601 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338707, "tid": 2379456, + "ts": 6267454799681.874, "dur": 679.891, + "args": { + "External id": 470427,"Record function id": 0, "Ev Idx": 8602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454799751.249, "dur": 7.294, + "args": { + "External id": 470428,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454799777.088, "dur": 34.996, + "args": { + "External id": 470429,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799783.350, "dur": 2.090, + "args": { + "External id": 470430,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799788.282, "dur": 0.805, + "args": { + "External id": 470431,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799790.785, "dur": 3.078, + "args": { + "External id": 470432,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799795.750, "dur": 0.886, + "args": { + "External id": 470433,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799798.518, "dur": 0.566, + "args": { + "External id": 470434,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799800.385, "dur": 0.636, + "args": { + "External id": 470435,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799802.607, "dur": 0.871, + "args": { + "External id": 470436,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799805.588, "dur": 0.395, + "args": { + "External id": 470437,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799807.258, "dur": 0.600, + "args": { + "External id": 470438,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454799824.298, "dur": 51.526, + "args": { + "External id": 470439,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267454799911.267, "dur": 121.569, + "args": { + "External id": 470440,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454799923.958, "dur": 3.500, + "args": { + "External id": 470441,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267454799933.144, "dur": 14.385, + "args": { + "External id": 470442,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454799938.407, "dur": 8.661, + "args": { + "External id": 470443,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799942.422, "dur": 3.086, + "args": { + "External id": 470444,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454799955.554, "dur": 24.974, + "args": { + "External id": 470445,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799957.660, "dur": 0.632, + "args": { + "External id": 470446,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799959.828, "dur": 0.791, + "args": { + "External id": 470447,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799962.587, "dur": 0.511, + "args": { + "External id": 470448,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799964.678, "dur": 0.541, + "args": { + "External id": 470449,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799966.466, "dur": 0.863, + "args": { + "External id": 470450,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799968.916, "dur": 0.565, + "args": { + "External id": 470451,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799971.129, "dur": 0.354, + "args": { + "External id": 470452,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799972.629, "dur": 2.575, + "args": { + "External id": 470453,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454799977.095, "dur": 0.527, + "args": { + "External id": 470454,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454799991.733, "dur": 32.375, + "args": { + "External id": 470455,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454800132.075, "dur": 141.403, + "args": { + "External id": 470456,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454800161.789, "dur": 107.454, + "args": { + "External id": 470457,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8632, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454800173.867, "dur": 88.671, + "args": { + "External id": 470458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454800294.208, "dur": 2.266, + "args": { + "External id": 470459,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8634, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454800396.539, "dur": 2107.176, + "args": { + "External id": 470460,"Sequence number": 5036633, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8635 + } + }, + { + "ph": "f", "id": 410, "pid": 2338707, "tid": 2379456, "ts": 6267454800396.539, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454800522.650, "dur": 148.282, + "args": { + "External id": 470461,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454800723.778, "dur": 46.814, + "args": { + "External id": 470462,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267454800794.359, "dur": 63.229, + "args": { + "External id": 470463,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454800869.428, "dur": 38.516, + "args": { + "External id": 470464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454800915.217, "dur": 38.506, + "args": { + "External id": 470465,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454800962.478, "dur": 38.466, + "args": { + "External id": 470466,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454801011.176, "dur": 34.284, + "args": { + "External id": 470467,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454801118.970, "dur": 30.124, + "args": { + "External id": 470468,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454801177.088, "dur": 32.521, + "args": { + "External id": 470469,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454801239.785, "dur": 23.749, + "args": { + "External id": 470470,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454801280.494, "dur": 17.529, + "args": { + "External id": 470471,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454801311.393, "dur": 51.451, + "args": { + "External id": 470472,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454801367.974, "dur": 38.666, + "args": { + "External id": 470473,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454801440.203, "dur": 348.966, + "args": { + "External id": 470474,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454801552.708, "dur": 8.906, + "args": { + "External id": 470475,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454801564.413, "dur": 3.790, + "args": { + "External id": 470476,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454801569.671, "dur": 4.755, + "args": { + "External id": 470477,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454801575.955, "dur": 2.681, + "args": { + "External id": 470478,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454801653.212, "dur": 6.555, + "args": { + "External id": 470479,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454801655.621, "dur": 3.717, + "args": { + "External id": 470480,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454801661.764, "dur": 44.975, + "args": { + "External id": 470481,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454801669.484, "dur": 2.049, + "args": { + "External id": 470482,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454801708.575, "dur": 2.030, + "args": { + "External id": 470483,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454801709.780, "dur": 0.718, + "args": { + "External id": 470484,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454801711.931, "dur": 18.575, + "args": { + "External id": 470485,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454801714.595, "dur": 0.659, + "args": { + "External id": 470486,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454801832.390, "dur": 33.779, + "args": { + "External id": 470487,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454801886.751, "dur": 19.190, + "args": { + "External id": 470488,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454801916.121, "dur": 56.556, + "args": { + "External id": 470489,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454801981.734, "dur": 47.105, + "args": { + "External id": 470490,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454802042.014, "dur": 67.343, + "args": { + "External id": 470491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454802121.807, "dur": 42.401, + "args": { + "External id": 470492,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454802174.078, "dur": 35.440, + "args": { + "External id": 470493,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454802218.813, "dur": 35.430, + "args": { + "External id": 470494,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267454802276.966, "dur": 30.905, + "args": { + "External id": 470495,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454802326.306, "dur": 30.495, + "args": { + "External id": 470496,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454802372.617, "dur": 22.456, + "args": { + "External id": 470497,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454802410.375, "dur": 18.676, + "args": { + "External id": 470498,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267454802444.111, "dur": 19.614, + "args": { + "External id": 470499,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802553.622, "dur": 18.396, + "args": { + "External id": 470500,"Record function id": 0, "Ev Idx": 8675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802557.544, "dur": 13.205, + "args": { + "External id": 470501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802562.419, "dur": 7.147, + "args": { + "External id": 470502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802564.856, "dur": 4.549, + "args": { + "External id": 470503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802576.852, "dur": 6.122, + "args": { + "External id": 470504,"Record function id": 0, "Ev Idx": 8679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802578.311, "dur": 4.181, + "args": { + "External id": 470505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802579.199, "dur": 2.761, + "args": { + "External id": 470506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802580.488, "dur": 1.356, + "args": { + "External id": 470507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802586.811, "dur": 23.786, + "args": { + "External id": 470508,"Record function id": 0, "Ev Idx": 8683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802588.124, "dur": 20.775, + "args": { + "External id": 470509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802588.929, "dur": 2.016, + "args": { + "External id": 470510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802589.454, "dur": 1.400, + "args": { + "External id": 470511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802617.474, "dur": 6.974, + "args": { + "External id": 470512,"Record function id": 0, "Ev Idx": 8687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802619.172, "dur": 4.800, + "args": { + "External id": 470513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802620.618, "dur": 2.460, + "args": { + "External id": 470514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802621.552, "dur": 1.444, + "args": { + "External id": 470515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802628.320, "dur": 20.873, + "args": { + "External id": 470516,"Record function id": 0, "Ev Idx": 8691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802643.389, "dur": 5.289, + "args": { + "External id": 470517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802644.092, "dur": 4.018, + "args": { + "External id": 470518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802644.579, "dur": 3.456, + "args": { + "External id": 470519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802653.232, "dur": 7.320, + "args": { + "External id": 470520,"Record function id": 0, "Ev Idx": 8695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802654.758, "dur": 5.327, + "args": { + "External id": 470521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802655.531, "dur": 4.005, + "args": { + "External id": 470522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802658.222, "dur": 1.183, + "args": { + "External id": 470523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802664.560, "dur": 5.305, + "args": { + "External id": 470524,"Record function id": 0, "Ev Idx": 8699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802666.291, "dur": 3.117, + "args": { + "External id": 470525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802667.087, "dur": 1.794, + "args": { + "External id": 470526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802667.969, "dur": 0.823, + "args": { + "External id": 470527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802673.524, "dur": 51.872, + "args": { + "External id": 470528,"Record function id": 0, "Ev Idx": 8703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802721.445, "dur": 3.224, + "args": { + "External id": 470529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802722.745, "dur": 1.294, + "args": { + "External id": 470530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802723.138, "dur": 0.791, + "args": { + "External id": 470531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802729.811, "dur": 6.759, + "args": { + "External id": 470532,"Record function id": 0, "Ev Idx": 8707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454802731.125, "dur": 4.883, + "args": { + "External id": 470533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802731.868, "dur": 3.574, + "args": { + "External id": 470534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454802734.521, "dur": 0.788, + "args": { + "External id": 470535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454802741.463, "dur": 64283.824, + "args": { + "External id": 470536,"Record function id": 0, "Sequence number": 5036632, "Fwd thread id": 1, "Ev Idx": 8711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454802743.424, "dur": 64269.831, + "args": { + "External id": 470537,"Sequence number": 5036632, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8712 + } + }, + { + "ph": "f", "id": 411, "pid": 2338707, "tid": 2379456, "ts": 6267454802743.424, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 2338707, "tid": 2379456, + "ts": 6267454802779.970, "dur": 52.465, + "args": { + "External id": 470538,"Record function id": 0, "Ev Idx": 8713 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 2338707, "tid": 2379456, + "ts": 6267454802841.786, "dur": 76.554, + "args": { + "External id": 470539,"Record function id": 0, "Ev Idx": 8714 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 2338707, "tid": 2379456, + "ts": 6267454802925.769, "dur": 64077.906, + "args": { + "External id": 470540,"Record function id": 0, "Ev Idx": 8715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454803034.179, "dur": 8.487, + "args": { + "External id": 470541,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454803054.273, "dur": 46.821, + "args": { + "External id": 470542,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454803126.036, "dur": 62675.671, + "args": { + "External id": 470543,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454803143.649, "dur": 62640.630, + "args": { + "External id": 470544,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454803248.599, "dur": 22.528, + "args": { + "External id": 470545,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454803294.937, "dur": 62435.275, + "args": { + "External id": 470546,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454803298.780, "dur": 62430.132, + "args": { + "External id": 470547,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454803318.059, "dur": 16.204, + "args": { + "External id": 470548,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454803337.220, "dur": 62385.934, + "args": { + "External id": 470549,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454865932.024, "dur": 14.811, + "args": { + "External id": 470550,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454865936.834, "dur": 9.392, + "args": { + "External id": 470551,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454865985.456, "dur": 546.331, + "args": { + "External id": 470552,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454866027.656, "dur": 497.012, + "args": { + "External id": 470553,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8728, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454866042.271, "dur": 475.630, + "args": { + "External id": 470554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454866559.527, "dur": 3.289, + "args": { + "External id": 470555,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8730, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454866657.226, "dur": 9.805, + "args": { + "External id": 470556,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454866681.628, "dur": 41.451, + "args": { + "External id": 470557,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454866735.227, "dur": 2.061, + "args": { + "External id": 470558,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454866743.539, "dur": 13.936, + "args": { + "External id": 470559,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454866764.029, "dur": 1.231, + "args": { + "External id": 470560,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454866769.913, "dur": 11.870, + "args": { + "External id": 470561,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454866787.863, "dur": 0.909, + "args": { + "External id": 470562,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454866793.751, "dur": 12.586, + "args": { + "External id": 470563,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454866814.633, "dur": 1.012, + "args": { + "External id": 470564,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454866819.823, "dur": 12.688, + "args": { + "External id": 470565,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454866837.264, "dur": 1.244, + "args": { + "External id": 470566,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454866842.882, "dur": 13.563, + "args": { + "External id": 470567,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454866861.508, "dur": 0.963, + "args": { + "External id": 470568,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454866872.493, "dur": 13.002, + "args": { + "External id": 470569,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454866890.186, "dur": 1.005, + "args": { + "External id": 470570,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454866895.622, "dur": 13.785, + "args": { + "External id": 470571,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454866913.736, "dur": 3.632, + "args": { + "External id": 470572,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454866921.911, "dur": 11.918, + "args": { + "External id": 470573,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454867043.733, "dur": 3472.066, + "args": { + "External id": 470574,"Record function id": 0, "Ev Idx": 8749 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 2338707, "tid": 2379456, + "ts": 6267454867111.934, "dur": 1239.599, + "args": { + "External id": 470575,"Record function id": 0, "Ev Idx": 8750 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338707, "tid": 2379456, + "ts": 6267454867134.077, "dur": 385.648, + "args": { + "External id": 470576,"Record function id": 0, "Ev Idx": 8751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454867229.579, "dur": 6.193, + "args": { + "External id": 470577,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454867240.494, "dur": 0.911, + "args": { + "External id": 470578,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454867243.906, "dur": 1.230, + "args": { + "External id": 470579,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454867247.004, "dur": 1.336, + "args": { + "External id": 470580,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454867257.602, "dur": 1.142, + "args": { + "External id": 470581,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454867260.223, "dur": 0.821, + "args": { + "External id": 470582,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454867262.829, "dur": 0.919, + "args": { + "External id": 470583,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454867265.290, "dur": 3.992, + "args": { + "External id": 470584,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454867273.123, "dur": 0.979, + "args": { + "External id": 470585,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454867275.794, "dur": 0.835, + "args": { + "External id": 470586,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454867297.369, "dur": 186.066, + "args": { + "External id": 470587,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454867315.297, "dur": 162.651, + "args": { + "External id": 470588,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454867344.457, "dur": 17.631, + "args": { + "External id": 470589,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454867366.341, "dur": 80.242, + "args": { + "External id": 470590,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454867369.583, "dur": 76.603, + "args": { + "External id": 470591,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867374.349, "dur": 6.317, + "args": { + "External id": 470592,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454867385.213, "dur": 60.133, + "args": { + "External id": 470593,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8768 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 2338707, "tid": 2379456, + "ts": 6267454867630.981, "dur": 712.038, + "args": { + "External id": 470594,"Record function id": 0, "Ev Idx": 8769 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338707, "tid": 2379456, + "ts": 6267454867652.280, "dur": 675.019, + "args": { + "External id": 470595,"Record function id": 0, "Ev Idx": 8770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454867720.777, "dur": 7.954, + "args": { + "External id": 470596,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454867748.840, "dur": 34.026, + "args": { + "External id": 470597,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867755.044, "dur": 1.964, + "args": { + "External id": 470598,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867759.425, "dur": 0.569, + "args": { + "External id": 470599,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867761.961, "dur": 2.790, + "args": { + "External id": 470600,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867766.426, "dur": 0.623, + "args": { + "External id": 470601,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867768.252, "dur": 0.839, + "args": { + "External id": 470602,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867771.355, "dur": 0.468, + "args": { + "External id": 470603,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867773.752, "dur": 0.649, + "args": { + "External id": 470604,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867775.846, "dur": 0.478, + "args": { + "External id": 470605,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867778.210, "dur": 0.583, + "args": { + "External id": 470606,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454867794.526, "dur": 50.907, + "args": { + "External id": 470607,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267454867880.174, "dur": 121.799, + "args": { + "External id": 470608,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454867892.217, "dur": 3.706, + "args": { + "External id": 470609,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267454867901.898, "dur": 15.181, + "args": { + "External id": 470610,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454867907.450, "dur": 9.138, + "args": { + "External id": 470611,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867911.910, "dur": 2.949, + "args": { + "External id": 470612,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454867925.178, "dur": 24.092, + "args": { + "External id": 470613,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867927.145, "dur": 0.414, + "args": { + "External id": 470614,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867929.082, "dur": 0.486, + "args": { + "External id": 470615,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867931.545, "dur": 0.445, + "args": { + "External id": 470616,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867933.692, "dur": 0.281, + "args": { + "External id": 470617,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867935.082, "dur": 0.458, + "args": { + "External id": 470618,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867937.379, "dur": 0.596, + "args": { + "External id": 470619,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867939.644, "dur": 0.291, + "args": { + "External id": 470620,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867940.980, "dur": 2.891, + "args": { + "External id": 470621,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454867945.653, "dur": 0.656, + "args": { + "External id": 470622,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454867960.372, "dur": 33.344, + "args": { + "External id": 470623,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454868053.125, "dur": 183.262, + "args": { + "External id": 470624,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454868126.849, "dur": 105.180, + "args": { + "External id": 470625,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8800, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454868139.397, "dur": 87.842, + "args": { + "External id": 470626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454868258.407, "dur": 2.157, + "args": { + "External id": 470627,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8802, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454868360.613, "dur": 2127.239, + "args": { + "External id": 470628,"Sequence number": 5036631, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8803 + } + }, + { + "ph": "f", "id": 412, "pid": 2338707, "tid": 2379456, "ts": 6267454868360.613, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454868493.439, "dur": 154.545, + "args": { + "External id": 470629,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454868705.326, "dur": 49.477, + "args": { + "External id": 470630,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267454868778.309, "dur": 71.176, + "args": { + "External id": 470631,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454868860.819, "dur": 36.587, + "args": { + "External id": 470632,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454868904.904, "dur": 36.617, + "args": { + "External id": 470633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454868948.892, "dur": 31.802, + "args": { + "External id": 470634,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454868990.285, "dur": 32.876, + "args": { + "External id": 470635,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454869052.593, "dur": 73.702, + "args": { + "External id": 470636,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454869152.870, "dur": 33.224, + "args": { + "External id": 470637,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454869212.701, "dur": 23.058, + "args": { + "External id": 470638,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454869251.715, "dur": 18.900, + "args": { + "External id": 470639,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454869283.735, "dur": 52.189, + "args": { + "External id": 470640,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454869340.897, "dur": 40.946, + "args": { + "External id": 470641,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454869415.924, "dur": 347.420, + "args": { + "External id": 470642,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454869530.298, "dur": 9.423, + "args": { + "External id": 470643,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454869543.116, "dur": 3.560, + "args": { + "External id": 470644,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454869548.129, "dur": 4.474, + "args": { + "External id": 470645,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454869554.057, "dur": 2.463, + "args": { + "External id": 470646,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454869637.940, "dur": 6.759, + "args": { + "External id": 470647,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454869640.510, "dur": 3.704, + "args": { + "External id": 470648,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454869646.956, "dur": 38.930, + "args": { + "External id": 470649,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454869653.850, "dur": 2.133, + "args": { + "External id": 470650,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454869688.169, "dur": 2.307, + "args": { + "External id": 470651,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454869689.403, "dur": 0.980, + "args": { + "External id": 470652,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454869691.946, "dur": 18.249, + "args": { + "External id": 470653,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454869694.747, "dur": 0.983, + "args": { + "External id": 470654,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454869804.465, "dur": 34.595, + "args": { + "External id": 470655,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454869858.989, "dur": 20.457, + "args": { + "External id": 470656,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454869889.454, "dur": 60.745, + "args": { + "External id": 470657,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454869959.290, "dur": 49.481, + "args": { + "External id": 470658,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454870021.476, "dur": 26.755, + "args": { + "External id": 470659,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454870093.374, "dur": 47.215, + "args": { + "External id": 470660,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454870154.839, "dur": 35.414, + "args": { + "External id": 470661,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454870198.825, "dur": 37.097, + "args": { + "External id": 470662,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267454870258.920, "dur": 30.912, + "args": { + "External id": 470663,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454870310.340, "dur": 29.774, + "args": { + "External id": 470664,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454870356.374, "dur": 25.011, + "args": { + "External id": 470665,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454870397.389, "dur": 19.206, + "args": { + "External id": 470666,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267454870430.230, "dur": 19.956, + "args": { + "External id": 470667,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870542.432, "dur": 32.672, + "args": { + "External id": 470668,"Record function id": 0, "Ev Idx": 8843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870546.373, "dur": 27.376, + "args": { + "External id": 470669,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870564.482, "dur": 7.792, + "args": { + "External id": 470670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870567.090, "dur": 5.005, + "args": { + "External id": 470671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870580.003, "dur": 6.196, + "args": { + "External id": 470672,"Record function id": 0, "Ev Idx": 8847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870581.792, "dur": 3.887, + "args": { + "External id": 470673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870582.811, "dur": 2.301, + "args": { + "External id": 470674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870583.998, "dur": 1.035, + "args": { + "External id": 470675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870590.110, "dur": 25.143, + "args": { + "External id": 470676,"Record function id": 0, "Ev Idx": 8851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870607.876, "dur": 6.220, + "args": { + "External id": 470677,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870609.912, "dur": 3.176, + "args": { + "External id": 470678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870610.719, "dur": 2.083, + "args": { + "External id": 470679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870621.104, "dur": 5.215, + "args": { + "External id": 470680,"Record function id": 0, "Ev Idx": 8855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870622.778, "dur": 3.067, + "args": { + "External id": 470681,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870623.604, "dur": 1.710, + "args": { + "External id": 470682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870624.219, "dur": 1.018, + "args": { + "External id": 470683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870629.999, "dur": 7.540, + "args": { + "External id": 470684,"Record function id": 0, "Ev Idx": 8859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870631.589, "dur": 5.452, + "args": { + "External id": 470685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870632.358, "dur": 4.071, + "args": { + "External id": 470686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870632.925, "dur": 3.432, + "args": { + "External id": 470687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870641.433, "dur": 5.197, + "args": { + "External id": 470688,"Record function id": 0, "Ev Idx": 8863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870643.077, "dur": 3.055, + "args": { + "External id": 470689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870643.870, "dur": 1.591, + "args": { + "External id": 470690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870644.440, "dur": 0.929, + "args": { + "External id": 470691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870650.399, "dur": 4.455, + "args": { + "External id": 470692,"Record function id": 0, "Ev Idx": 8867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870651.596, "dur": 2.763, + "args": { + "External id": 470693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870652.360, "dur": 1.434, + "args": { + "External id": 470694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870652.863, "dur": 0.841, + "args": { + "External id": 470695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870658.499, "dur": 7.001, + "args": { + "External id": 470696,"Record function id": 0, "Ev Idx": 8871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870659.838, "dur": 5.181, + "args": { + "External id": 470697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870660.495, "dur": 3.941, + "args": { + "External id": 470698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870663.443, "dur": 0.878, + "args": { + "External id": 470699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870669.863, "dur": 4.683, + "args": { + "External id": 470700,"Record function id": 0, "Ev Idx": 8875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454870671.333, "dur": 2.716, + "args": { + "External id": 470701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870671.922, "dur": 1.652, + "args": { + "External id": 470702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454870672.735, "dur": 0.764, + "args": { + "External id": 470703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454870679.706, "dur": 61570.599, + "args": { + "External id": 470704,"Record function id": 0, "Sequence number": 5036630, "Fwd thread id": 1, "Ev Idx": 8879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454870681.439, "dur": 61557.890, + "args": { + "External id": 470705,"Sequence number": 5036630, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8880 + } + }, + { + "ph": "f", "id": 413, "pid": 2338707, "tid": 2379456, "ts": 6267454870681.439, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 2338707, "tid": 2379456, + "ts": 6267454870720.460, "dur": 47.625, + "args": { + "External id": 470706,"Record function id": 0, "Ev Idx": 8881 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 2338707, "tid": 2379456, + "ts": 6267454870776.586, "dur": 71.406, + "args": { + "External id": 470707,"Record function id": 0, "Ev Idx": 8882 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 2338707, "tid": 2379456, + "ts": 6267454870854.521, "dur": 61375.051, + "args": { + "External id": 470708,"Record function id": 0, "Ev Idx": 8883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454870963.131, "dur": 8.659, + "args": { + "External id": 470709,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454870983.650, "dur": 5.400, + "args": { + "External id": 470710,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454871007.831, "dur": 60034.123, + "args": { + "External id": 470711,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454871023.740, "dur": 60001.525, + "args": { + "External id": 470712,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454871170.321, "dur": 32.651, + "args": { + "External id": 470713,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454871232.909, "dur": 59734.102, + "args": { + "External id": 470714,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454871236.334, "dur": 59728.796, + "args": { + "External id": 470715,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454871251.264, "dur": 15.896, + "args": { + "External id": 470716,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454871269.792, "dur": 59690.991, + "args": { + "External id": 470717,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454931218.087, "dur": 14.700, + "args": { + "External id": 470718,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454931222.426, "dur": 9.673, + "args": { + "External id": 470719,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454931271.817, "dur": 454.533, + "args": { + "External id": 470720,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454931309.486, "dur": 410.335, + "args": { + "External id": 470721,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8896, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454931324.444, "dur": 387.561, + "args": { + "External id": 470722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454931753.534, "dur": 2.713, + "args": { + "External id": 470723,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8898, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454931829.410, "dur": 9.777, + "args": { + "External id": 470724,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454931854.437, "dur": 43.680, + "args": { + "External id": 470725,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454931911.123, "dur": 2.107, + "args": { + "External id": 470726,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454931918.827, "dur": 17.086, + "args": { + "External id": 470727,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454931942.194, "dur": 0.978, + "args": { + "External id": 470728,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454931948.515, "dur": 16.055, + "args": { + "External id": 470729,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454931970.195, "dur": 1.023, + "args": { + "External id": 470730,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454931975.119, "dur": 12.743, + "args": { + "External id": 470731,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454931992.989, "dur": 1.104, + "args": { + "External id": 470732,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454931998.696, "dur": 11.035, + "args": { + "External id": 470733,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454932014.688, "dur": 1.360, + "args": { + "External id": 470734,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454932020.319, "dur": 12.169, + "args": { + "External id": 470735,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454932037.580, "dur": 1.236, + "args": { + "External id": 470736,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454932046.231, "dur": 51.434, + "args": { + "External id": 470737,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454932107.782, "dur": 1.800, + "args": { + "External id": 470738,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454932115.199, "dur": 15.518, + "args": { + "External id": 470739,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454932136.028, "dur": 3.336, + "args": { + "External id": 470740,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454932143.538, "dur": 13.297, + "args": { + "External id": 470741,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454932269.128, "dur": 3439.078, + "args": { + "External id": 470742,"Record function id": 0, "Ev Idx": 8917 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 2338707, "tid": 2379456, + "ts": 6267454932291.438, "dur": 1233.351, + "args": { + "External id": 470743,"Record function id": 0, "Ev Idx": 8918 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338707, "tid": 2379456, + "ts": 6267454932309.184, "dur": 393.521, + "args": { + "External id": 470744,"Record function id": 0, "Ev Idx": 8919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454932396.945, "dur": 4.957, + "args": { + "External id": 470745,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454932405.462, "dur": 1.006, + "args": { + "External id": 470746,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454932408.587, "dur": 0.908, + "args": { + "External id": 470747,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454932411.501, "dur": 1.231, + "args": { + "External id": 470748,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454932417.075, "dur": 0.760, + "args": { + "External id": 470749,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454932419.769, "dur": 0.746, + "args": { + "External id": 470750,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454932422.451, "dur": 0.935, + "args": { + "External id": 470751,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454932425.051, "dur": 3.943, + "args": { + "External id": 470752,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454932433.387, "dur": 0.663, + "args": { + "External id": 470753,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454932436.287, "dur": 0.822, + "args": { + "External id": 470754,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454932457.276, "dur": 206.149, + "args": { + "External id": 470755,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454932475.999, "dur": 181.332, + "args": { + "External id": 470756,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454932506.529, "dur": 17.479, + "args": { + "External id": 470757,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454932528.602, "dur": 94.257, + "args": { + "External id": 470758,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454932531.808, "dur": 90.531, + "args": { + "External id": 470759,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454932537.156, "dur": 6.220, + "args": { + "External id": 470760,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454932545.617, "dur": 75.646, + "args": { + "External id": 470761,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8936 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 2338707, "tid": 2379456, + "ts": 6267454932796.633, "dur": 718.515, + "args": { + "External id": 470762,"Record function id": 0, "Ev Idx": 8937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338707, "tid": 2379456, + "ts": 6267454932818.203, "dur": 682.180, + "args": { + "External id": 470763,"Record function id": 0, "Ev Idx": 8938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454932885.177, "dur": 6.791, + "args": { + "External id": 470764,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454932910.017, "dur": 34.100, + "args": { + "External id": 470765,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454932917.132, "dur": 2.356, + "args": { + "External id": 470766,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454932921.546, "dur": 0.726, + "args": { + "External id": 470767,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454932924.044, "dur": 2.905, + "args": { + "External id": 470768,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454932928.940, "dur": 0.533, + "args": { + "External id": 470769,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454932930.664, "dur": 0.584, + "args": { + "External id": 470770,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454932933.132, "dur": 0.446, + "args": { + "External id": 470771,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454932935.048, "dur": 0.369, + "args": { + "External id": 470772,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454932936.816, "dur": 0.472, + "args": { + "External id": 470773,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454932939.147, "dur": 0.548, + "args": { + "External id": 470774,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454932956.683, "dur": 46.540, + "args": { + "External id": 470775,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267454933041.166, "dur": 180.525, + "args": { + "External id": 470776,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454933053.419, "dur": 45.178, + "args": { + "External id": 470777,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267454933106.931, "dur": 16.438, + "args": { + "External id": 470778,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454933112.348, "dur": 10.512, + "args": { + "External id": 470779,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454933117.197, "dur": 3.287, + "args": { + "External id": 470780,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454933132.982, "dur": 26.182, + "args": { + "External id": 470781,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454933135.857, "dur": 0.515, + "args": { + "External id": 470782,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454933137.983, "dur": 0.670, + "args": { + "External id": 470783,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454933140.658, "dur": 0.496, + "args": { + "External id": 470784,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454933142.741, "dur": 0.410, + "args": { + "External id": 470785,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454933144.217, "dur": 0.835, + "args": { + "External id": 470786,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454933147.068, "dur": 0.555, + "args": { + "External id": 470787,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454933149.201, "dur": 0.345, + "args": { + "External id": 470788,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454933150.631, "dur": 2.727, + "args": { + "External id": 470789,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454933155.164, "dur": 0.660, + "args": { + "External id": 470790,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454933174.475, "dur": 38.397, + "args": { + "External id": 470791,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454933277.960, "dur": 140.612, + "args": { + "External id": 470792,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454933313.264, "dur": 100.914, + "args": { + "External id": 470793,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8968, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454933324.833, "dur": 84.492, + "args": { + "External id": 470794,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454933438.898, "dur": 2.583, + "args": { + "External id": 470795,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8970, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454933532.792, "dur": 2150.139, + "args": { + "External id": 470796,"Sequence number": 5036629, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8971 + } + }, + { + "ph": "f", "id": 414, "pid": 2338707, "tid": 2379456, "ts": 6267454933532.792, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454933678.746, "dur": 132.245, + "args": { + "External id": 470797,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454933865.964, "dur": 43.550, + "args": { + "External id": 470798,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267454933932.558, "dur": 60.436, + "args": { + "External id": 470799,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454934005.559, "dur": 38.170, + "args": { + "External id": 470800,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454934050.976, "dur": 85.939, + "args": { + "External id": 470801,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454934151.093, "dur": 35.020, + "args": { + "External id": 470802,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454934196.434, "dur": 34.788, + "args": { + "External id": 470803,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454934264.725, "dur": 30.343, + "args": { + "External id": 470804,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454934316.013, "dur": 33.431, + "args": { + "External id": 470805,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454934373.808, "dur": 24.192, + "args": { + "External id": 470806,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454934414.486, "dur": 18.673, + "args": { + "External id": 470807,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454934446.029, "dur": 47.342, + "args": { + "External id": 470808,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454934497.818, "dur": 39.670, + "args": { + "External id": 470809,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454934570.730, "dur": 349.189, + "args": { + "External id": 470810,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454934702.556, "dur": 11.348, + "args": { + "External id": 470811,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454934716.884, "dur": 4.265, + "args": { + "External id": 470812,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454934722.694, "dur": 4.706, + "args": { + "External id": 470813,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454934728.675, "dur": 2.651, + "args": { + "External id": 470814,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454934793.676, "dur": 5.816, + "args": { + "External id": 470815,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454934795.788, "dur": 3.473, + "args": { + "External id": 470816,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454934801.638, "dur": 37.516, + "args": { + "External id": 470817,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454934808.090, "dur": 1.803, + "args": { + "External id": 470818,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267454934840.869, "dur": 2.238, + "args": { + "External id": 470819,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454934842.155, "dur": 0.860, + "args": { + "External id": 470820,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267454934844.583, "dur": 20.795, + "args": { + "External id": 470821,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454934850.892, "dur": 0.757, + "args": { + "External id": 470822,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454934964.845, "dur": 32.986, + "args": { + "External id": 470823,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454935021.730, "dur": 19.606, + "args": { + "External id": 470824,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454935051.940, "dur": 101.418, + "args": { + "External id": 470825,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454935166.439, "dur": 48.727, + "args": { + "External id": 470826,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454935229.849, "dur": 26.515, + "args": { + "External id": 470827,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454935264.231, "dur": 36.368, + "args": { + "External id": 470828,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454935309.840, "dur": 34.850, + "args": { + "External id": 470829,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454935353.529, "dur": 36.331, + "args": { + "External id": 470830,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267454935412.590, "dur": 31.186, + "args": { + "External id": 470831,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454935465.016, "dur": 29.334, + "args": { + "External id": 470832,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454935524.583, "dur": 22.290, + "args": { + "External id": 470833,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454935564.087, "dur": 18.967, + "args": { + "External id": 470834,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267454935616.785, "dur": 23.614, + "args": { + "External id": 470835,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935734.682, "dur": 18.877, + "args": { + "External id": 470836,"Record function id": 0, "Ev Idx": 9011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935738.924, "dur": 13.353, + "args": { + "External id": 470837,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935743.969, "dur": 7.080, + "args": { + "External id": 470838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935746.179, "dur": 4.714, + "args": { + "External id": 470839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935758.432, "dur": 6.077, + "args": { + "External id": 470840,"Record function id": 0, "Ev Idx": 9015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935760.432, "dur": 3.527, + "args": { + "External id": 470841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935761.232, "dur": 2.133, + "args": { + "External id": 470842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935762.223, "dur": 1.063, + "args": { + "External id": 470843,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935771.553, "dur": 4.669, + "args": { + "External id": 470844,"Record function id": 0, "Ev Idx": 9019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935772.742, "dur": 2.991, + "args": { + "External id": 470845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935773.321, "dur": 1.891, + "args": { + "External id": 470846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935773.718, "dur": 1.401, + "args": { + "External id": 470847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935780.045, "dur": 4.660, + "args": { + "External id": 470848,"Record function id": 0, "Ev Idx": 9023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935781.396, "dur": 2.830, + "args": { + "External id": 470849,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935782.107, "dur": 1.645, + "args": { + "External id": 470850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935782.904, "dur": 0.749, + "args": { + "External id": 470851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935788.336, "dur": 6.662, + "args": { + "External id": 470852,"Record function id": 0, "Ev Idx": 9027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935789.571, "dur": 4.975, + "args": { + "External id": 470853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935790.116, "dur": 3.742, + "args": { + "External id": 470854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935790.541, "dur": 3.241, + "args": { + "External id": 470855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935798.808, "dur": 4.769, + "args": { + "External id": 470856,"Record function id": 0, "Ev Idx": 9031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935800.265, "dur": 2.833, + "args": { + "External id": 470857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935800.873, "dur": 1.431, + "args": { + "External id": 470858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935801.369, "dur": 0.821, + "args": { + "External id": 470859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935807.418, "dur": 4.158, + "args": { + "External id": 470860,"Record function id": 0, "Ev Idx": 9035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935808.894, "dur": 2.220, + "args": { + "External id": 470861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935809.424, "dur": 1.174, + "args": { + "External id": 470862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935809.769, "dur": 0.754, + "args": { + "External id": 470863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935815.271, "dur": 4.553, + "args": { + "External id": 470864,"Record function id": 0, "Ev Idx": 9039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935816.649, "dur": 2.703, + "args": { + "External id": 470865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935817.218, "dur": 1.587, + "args": { + "External id": 470866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935817.713, "dur": 0.979, + "args": { + "External id": 470867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935827.357, "dur": 4.608, + "args": { + "External id": 470868,"Record function id": 0, "Ev Idx": 9043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267454935828.685, "dur": 2.820, + "args": { + "External id": 470869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935829.386, "dur": 1.552, + "args": { + "External id": 470870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267454935829.981, "dur": 0.826, + "args": { + "External id": 470871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454935837.182, "dur": 61622.499, + "args": { + "External id": 470872,"Record function id": 0, "Sequence number": 5036628, "Fwd thread id": 1, "Ev Idx": 9047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454935839.003, "dur": 61609.141, + "args": { + "External id": 470873,"Sequence number": 5036628, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9048 + } + }, + { + "ph": "f", "id": 415, "pid": 2338707, "tid": 2379456, "ts": 6267454935839.003, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 2338707, "tid": 2379456, + "ts": 6267454935875.513, "dur": 46.060, + "args": { + "External id": 470874,"Record function id": 0, "Ev Idx": 9049 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 2338707, "tid": 2379456, + "ts": 6267454935931.307, "dur": 70.605, + "args": { + "External id": 470875,"Record function id": 0, "Ev Idx": 9050 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 2338707, "tid": 2379456, + "ts": 6267454936008.709, "dur": 61430.199, + "args": { + "External id": 470876,"Record function id": 0, "Ev Idx": 9051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454936160.497, "dur": 10.034, + "args": { + "External id": 470877,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454936183.324, "dur": 6.175, + "args": { + "External id": 470878,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454936208.893, "dur": 60068.050, + "args": { + "External id": 470879,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267454936225.346, "dur": 60035.079, + "args": { + "External id": 470880,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454936328.383, "dur": 21.895, + "args": { + "External id": 470881,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454936386.413, "dur": 59817.616, + "args": { + "External id": 470882,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454936390.336, "dur": 59811.536, + "args": { + "External id": 470883,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454936397.683, "dur": 14.909, + "args": { + "External id": 470884,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454936417.887, "dur": 59782.108, + "args": { + "External id": 470885,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454996415.654, "dur": 15.931, + "args": { + "External id": 470886,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454996420.736, "dur": 10.277, + "args": { + "External id": 470887,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454996474.135, "dur": 474.965, + "args": { + "External id": 470888,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454996518.268, "dur": 423.815, + "args": { + "External id": 470889,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9064, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454996533.547, "dur": 401.779, + "args": { + "External id": 470890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454996976.391, "dur": 3.052, + "args": { + "External id": 470891,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9066, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454997092.513, "dur": 10.209, + "args": { + "External id": 470892,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454997118.485, "dur": 46.176, + "args": { + "External id": 470893,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454997178.340, "dur": 1.922, + "args": { + "External id": 470894,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454997186.772, "dur": 15.256, + "args": { + "External id": 470895,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454997209.464, "dur": 1.127, + "args": { + "External id": 470896,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454997215.876, "dur": 12.217, + "args": { + "External id": 470897,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454997233.977, "dur": 0.955, + "args": { + "External id": 470898,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454997239.347, "dur": 12.133, + "args": { + "External id": 470899,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454997255.760, "dur": 1.108, + "args": { + "External id": 470900,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454997261.824, "dur": 11.946, + "args": { + "External id": 470901,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454997278.175, "dur": 1.443, + "args": { + "External id": 470902,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454997284.333, "dur": 11.977, + "args": { + "External id": 470903,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454997300.896, "dur": 1.105, + "args": { + "External id": 470904,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454997306.637, "dur": 11.462, + "args": { + "External id": 470905,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454997322.876, "dur": 1.336, + "args": { + "External id": 470906,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454997328.774, "dur": 11.861, + "args": { + "External id": 470907,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454997344.996, "dur": 3.553, + "args": { + "External id": 470908,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267454997353.047, "dur": 10.930, + "args": { + "External id": 470909,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454997478.347, "dur": 3525.917, + "args": { + "External id": 470910,"Record function id": 0, "Ev Idx": 9085 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 2338707, "tid": 2379456, + "ts": 6267454997503.902, "dur": 1305.250, + "args": { + "External id": 470911,"Record function id": 0, "Ev Idx": 9086 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338707, "tid": 2379456, + "ts": 6267454997523.121, "dur": 414.287, + "args": { + "External id": 470912,"Record function id": 0, "Ev Idx": 9087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454997637.031, "dur": 5.711, + "args": { + "External id": 470913,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454997647.553, "dur": 1.300, + "args": { + "External id": 470914,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454997651.596, "dur": 1.276, + "args": { + "External id": 470915,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454997654.588, "dur": 1.384, + "args": { + "External id": 470916,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454997658.385, "dur": 1.103, + "args": { + "External id": 470917,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454997661.018, "dur": 0.818, + "args": { + "External id": 470918,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454997663.462, "dur": 0.966, + "args": { + "External id": 470919,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454997666.170, "dur": 4.527, + "args": { + "External id": 470920,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454997674.739, "dur": 1.001, + "args": { + "External id": 470921,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267454997677.336, "dur": 0.694, + "args": { + "External id": 470922,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454997700.054, "dur": 199.367, + "args": { + "External id": 470923,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454997719.579, "dur": 173.741, + "args": { + "External id": 470924,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454997751.028, "dur": 19.008, + "args": { + "External id": 470925,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267454997774.434, "dur": 85.797, + "args": { + "External id": 470926,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267454997777.556, "dur": 82.264, + "args": { + "External id": 470927,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454997786.235, "dur": 6.816, + "args": { + "External id": 470928,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454997795.352, "dur": 63.780, + "args": { + "External id": 470929,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 2338707, "tid": 2379456, + "ts": 6267454998031.154, "dur": 768.681, + "args": { + "External id": 470930,"Record function id": 0, "Ev Idx": 9105 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338707, "tid": 2379456, + "ts": 6267454998052.818, "dur": 731.675, + "args": { + "External id": 470931,"Record function id": 0, "Ev Idx": 9106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454998167.555, "dur": 8.248, + "args": { + "External id": 470932,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454998195.012, "dur": 35.446, + "args": { + "External id": 470933,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998201.217, "dur": 2.340, + "args": { + "External id": 470934,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998206.370, "dur": 0.943, + "args": { + "External id": 470935,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998208.551, "dur": 3.178, + "args": { + "External id": 470936,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998213.518, "dur": 0.704, + "args": { + "External id": 470937,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998216.098, "dur": 0.391, + "args": { + "External id": 470938,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998217.732, "dur": 0.597, + "args": { + "External id": 470939,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998220.391, "dur": 0.652, + "args": { + "External id": 470940,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998222.692, "dur": 1.027, + "args": { + "External id": 470941,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998225.033, "dur": 0.590, + "args": { + "External id": 470942,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454998242.380, "dur": 49.027, + "args": { + "External id": 470943,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267454998328.324, "dur": 145.743, + "args": { + "External id": 470944,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454998341.447, "dur": 3.933, + "args": { + "External id": 470945,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267454998351.246, "dur": 14.264, + "args": { + "External id": 470946,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267454998356.333, "dur": 8.689, + "args": { + "External id": 470947,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998360.164, "dur": 3.333, + "args": { + "External id": 470948,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267454998373.943, "dur": 47.662, + "args": { + "External id": 470949,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998393.724, "dur": 0.752, + "args": { + "External id": 470950,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998396.617, "dur": 0.468, + "args": { + "External id": 470951,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998398.962, "dur": 0.462, + "args": { + "External id": 470952,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998400.611, "dur": 0.670, + "args": { + "External id": 470953,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998403.350, "dur": 0.527, + "args": { + "External id": 470954,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998405.298, "dur": 0.734, + "args": { + "External id": 470955,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998407.348, "dur": 0.638, + "args": { + "External id": 470956,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998409.870, "dur": 2.705, + "args": { + "External id": 470957,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267454998414.559, "dur": 0.490, + "args": { + "External id": 470958,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267454998432.853, "dur": 32.503, + "args": { + "External id": 470959,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267454998530.677, "dur": 159.844, + "args": { + "External id": 470960,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454998562.882, "dur": 122.823, + "args": { + "External id": 470961,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9136, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267454998574.878, "dur": 105.367, + "args": { + "External id": 470962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267454998712.959, "dur": 2.380, + "args": { + "External id": 470963,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9138, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267454998818.159, "dur": 2161.160, + "args": { + "External id": 470964,"Sequence number": 5036627, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9139 + } + }, + { + "ph": "f", "id": 416, "pid": 2338707, "tid": 2379456, "ts": 6267454998818.159, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454998948.845, "dur": 175.227, + "args": { + "External id": 470965,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454999180.313, "dur": 48.041, + "args": { + "External id": 470966,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267454999252.178, "dur": 68.741, + "args": { + "External id": 470967,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454999332.302, "dur": 38.066, + "args": { + "External id": 470968,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454999377.730, "dur": 37.765, + "args": { + "External id": 470969,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454999422.980, "dur": 32.007, + "args": { + "External id": 470970,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454999464.915, "dur": 33.554, + "args": { + "External id": 470971,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267454999529.016, "dur": 28.904, + "args": { + "External id": 470972,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267454999583.459, "dur": 51.938, + "args": { + "External id": 470973,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267454999664.163, "dur": 25.619, + "args": { + "External id": 470974,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267454999708.359, "dur": 17.835, + "args": { + "External id": 470975,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454999740.576, "dur": 49.876, + "args": { + "External id": 470976,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267454999794.735, "dur": 39.478, + "args": { + "External id": 470977,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267454999867.888, "dur": 376.823, + "args": { + "External id": 470978,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454999962.639, "dur": 8.198, + "args": { + "External id": 470979,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454999973.711, "dur": 3.291, + "args": { + "External id": 470980,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454999978.989, "dur": 5.122, + "args": { + "External id": 470981,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267454999985.451, "dur": 2.418, + "args": { + "External id": 470982,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455000099.957, "dur": 7.920, + "args": { + "External id": 470983,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455000102.543, "dur": 4.328, + "args": { + "External id": 470984,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455000110.285, "dur": 41.765, + "args": { + "External id": 470985,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455000117.772, "dur": 2.461, + "args": { + "External id": 470986,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455000153.961, "dur": 2.246, + "args": { + "External id": 470987,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455000155.362, "dur": 0.726, + "args": { + "External id": 470988,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455000157.569, "dur": 18.904, + "args": { + "External id": 470989,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455000160.235, "dur": 0.888, + "args": { + "External id": 470990,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455000293.968, "dur": 36.727, + "args": { + "External id": 470991,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455000354.405, "dur": 22.066, + "args": { + "External id": 470992,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455000387.162, "dur": 69.381, + "args": { + "External id": 470993,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455000465.396, "dur": 50.493, + "args": { + "External id": 470994,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455000528.565, "dur": 26.012, + "args": { + "External id": 470995,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455000562.417, "dur": 55.927, + "args": { + "External id": 470996,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455000632.589, "dur": 38.042, + "args": { + "External id": 470997,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455000689.647, "dur": 38.287, + "args": { + "External id": 470998,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267455000754.297, "dur": 30.155, + "args": { + "External id": 470999,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455000804.923, "dur": 26.873, + "args": { + "External id": 471000,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455000849.137, "dur": 20.752, + "args": { + "External id": 471001,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455000887.564, "dur": 17.238, + "args": { + "External id": 471002,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267455000922.718, "dur": 19.015, + "args": { + "External id": 471003,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001030.689, "dur": 18.900, + "args": { + "External id": 471004,"Record function id": 0, "Ev Idx": 9179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001034.563, "dur": 13.848, + "args": { + "External id": 471005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001039.650, "dur": 7.583, + "args": { + "External id": 471006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001042.151, "dur": 4.938, + "args": { + "External id": 471007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001054.251, "dur": 50.796, + "args": { + "External id": 471008,"Record function id": 0, "Ev Idx": 9183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001095.715, "dur": 7.937, + "args": { + "External id": 471009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001098.208, "dur": 4.257, + "args": { + "External id": 471010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001100.030, "dur": 2.088, + "args": { + "External id": 471011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001111.461, "dur": 6.279, + "args": { + "External id": 471012,"Record function id": 0, "Ev Idx": 9187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001113.694, "dur": 3.534, + "args": { + "External id": 471013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001114.520, "dur": 2.149, + "args": { + "External id": 471014,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001115.225, "dur": 1.336, + "args": { + "External id": 471015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001121.652, "dur": 7.707, + "args": { + "External id": 471016,"Record function id": 0, "Ev Idx": 9191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001123.072, "dur": 5.815, + "args": { + "External id": 471017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001123.703, "dur": 4.655, + "args": { + "External id": 471018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001124.432, "dur": 3.860, + "args": { + "External id": 471019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001133.076, "dur": 5.520, + "args": { + "External id": 471020,"Record function id": 0, "Ev Idx": 9195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001134.627, "dur": 3.465, + "args": { + "External id": 471021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001135.533, "dur": 1.894, + "args": { + "External id": 471022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001136.372, "dur": 0.971, + "args": { + "External id": 471023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001142.358, "dur": 5.197, + "args": { + "External id": 471024,"Record function id": 0, "Ev Idx": 9199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001143.818, "dur": 3.134, + "args": { + "External id": 471025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001144.573, "dur": 1.700, + "args": { + "External id": 471026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001145.185, "dur": 0.972, + "args": { + "External id": 471027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001151.557, "dur": 4.445, + "args": { + "External id": 471028,"Record function id": 0, "Ev Idx": 9203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001152.861, "dur": 2.664, + "args": { + "External id": 471029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001153.688, "dur": 1.318, + "args": { + "External id": 471030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001154.284, "dur": 0.646, + "args": { + "External id": 471031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001160.001, "dur": 4.697, + "args": { + "External id": 471032,"Record function id": 0, "Ev Idx": 9207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001161.647, "dur": 2.511, + "args": { + "External id": 471033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001162.301, "dur": 1.342, + "args": { + "External id": 471034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001162.639, "dur": 0.908, + "args": { + "External id": 471035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001168.812, "dur": 4.821, + "args": { + "External id": 471036,"Record function id": 0, "Ev Idx": 9211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455001170.175, "dur": 2.948, + "args": { + "External id": 471037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001170.833, "dur": 1.810, + "args": { + "External id": 471038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455001171.712, "dur": 0.799, + "args": { + "External id": 471039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455001178.629, "dur": 63305.932, + "args": { + "External id": 471040,"Record function id": 0, "Sequence number": 5036626, "Fwd thread id": 1, "Ev Idx": 9215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455001229.116, "dur": 63245.161, + "args": { + "External id": 471041,"Sequence number": 5036626, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9216 + } + }, + { + "ph": "f", "id": 417, "pid": 2338707, "tid": 2379456, "ts": 6267455001229.116, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 2338707, "tid": 2379456, + "ts": 6267455001274.337, "dur": 51.036, + "args": { + "External id": 471042,"Record function id": 0, "Ev Idx": 9217 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 2338707, "tid": 2379456, + "ts": 6267455001335.423, "dur": 76.489, + "args": { + "External id": 471043,"Record function id": 0, "Ev Idx": 9218 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 2338707, "tid": 2379456, + "ts": 6267455001419.492, "dur": 63045.965, + "args": { + "External id": 471044,"Record function id": 0, "Ev Idx": 9219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455001530.686, "dur": 8.734, + "args": { + "External id": 471045,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455001551.954, "dur": 5.752, + "args": { + "External id": 471046,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455001574.779, "dur": 61688.724, + "args": { + "External id": 471047,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455001611.621, "dur": 61632.351, + "args": { + "External id": 471048,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455001723.047, "dur": 21.656, + "args": { + "External id": 471049,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455001768.489, "dur": 61421.338, + "args": { + "External id": 471050,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455001774.547, "dur": 61414.169, + "args": { + "External id": 471051,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455001780.433, "dur": 14.676, + "args": { + "External id": 471052,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455001798.109, "dur": 61384.660, + "args": { + "External id": 471053,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455063388.866, "dur": 13.890, + "args": { + "External id": 471054,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455063393.327, "dur": 8.832, + "args": { + "External id": 471055,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455063441.076, "dur": 505.944, + "args": { + "External id": 471056,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455063478.552, "dur": 460.814, + "args": { + "External id": 471057,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9232, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455063492.744, "dur": 439.113, + "args": { + "External id": 471058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455063976.176, "dur": 2.640, + "args": { + "External id": 471059,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9234, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455064096.711, "dur": 8.557, + "args": { + "External id": 471060,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455064120.678, "dur": 49.033, + "args": { + "External id": 471061,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455064183.503, "dur": 1.734, + "args": { + "External id": 471062,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455064191.562, "dur": 18.829, + "args": { + "External id": 471063,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455064217.570, "dur": 1.326, + "args": { + "External id": 471064,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455064224.019, "dur": 12.462, + "args": { + "External id": 471065,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455064242.172, "dur": 1.064, + "args": { + "External id": 471066,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455064247.700, "dur": 12.942, + "args": { + "External id": 471067,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455064265.992, "dur": 1.122, + "args": { + "External id": 471068,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455064271.863, "dur": 12.035, + "args": { + "External id": 471069,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455064289.178, "dur": 1.232, + "args": { + "External id": 471070,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455064295.252, "dur": 15.757, + "args": { + "External id": 471071,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455064315.799, "dur": 3.746, + "args": { + "External id": 471072,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455064324.410, "dur": 13.055, + "args": { + "External id": 471073,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455064342.803, "dur": 1.374, + "args": { + "External id": 471074,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455064348.612, "dur": 14.493, + "args": { + "External id": 471075,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455064367.560, "dur": 1.321, + "args": { + "External id": 471076,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455064376.645, "dur": 12.360, + "args": { + "External id": 471077,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455064503.773, "dur": 3486.875, + "args": { + "External id": 471078,"Record function id": 0, "Ev Idx": 9253 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 2338707, "tid": 2379456, + "ts": 6267455064527.756, "dur": 1287.482, + "args": { + "External id": 471079,"Record function id": 0, "Ev Idx": 9254 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338707, "tid": 2379456, + "ts": 6267455064547.753, "dur": 409.669, + "args": { + "External id": 471080,"Record function id": 0, "Ev Idx": 9255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455064658.816, "dur": 5.653, + "args": { + "External id": 471081,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455064669.421, "dur": 1.245, + "args": { + "External id": 471082,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455064673.557, "dur": 1.591, + "args": { + "External id": 471083,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455064677.208, "dur": 1.159, + "args": { + "External id": 471084,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455064681.063, "dur": 0.835, + "args": { + "External id": 471085,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455064683.597, "dur": 3.416, + "args": { + "External id": 471086,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455064689.011, "dur": 0.915, + "args": { + "External id": 471087,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455064693.460, "dur": 1.883, + "args": { + "External id": 471088,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455064697.254, "dur": 0.793, + "args": { + "External id": 471089,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455064699.601, "dur": 0.769, + "args": { + "External id": 471090,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455064722.689, "dur": 196.059, + "args": { + "External id": 471091,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455064742.912, "dur": 169.777, + "args": { + "External id": 471092,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455064774.702, "dur": 16.837, + "args": { + "External id": 471093,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455064796.097, "dur": 81.426, + "args": { + "External id": 471094,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455064799.694, "dur": 77.454, + "args": { + "External id": 471095,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455064804.797, "dur": 6.230, + "args": { + "External id": 471096,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455064813.427, "dur": 62.934, + "args": { + "External id": 471097,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9272 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 2338707, "tid": 2379456, + "ts": 6267455065100.281, "dur": 705.111, + "args": { + "External id": 471098,"Record function id": 0, "Ev Idx": 9273 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338707, "tid": 2379456, + "ts": 6267455065125.765, "dur": 663.314, + "args": { + "External id": 471099,"Record function id": 0, "Ev Idx": 9274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455065198.552, "dur": 8.086, + "args": { + "External id": 471100,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455065225.133, "dur": 38.762, + "args": { + "External id": 471101,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065232.097, "dur": 4.264, + "args": { + "External id": 471102,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065238.361, "dur": 0.772, + "args": { + "External id": 471103,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065241.500, "dur": 0.464, + "args": { + "External id": 471104,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065243.972, "dur": 0.815, + "args": { + "External id": 471105,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065246.364, "dur": 0.798, + "args": { + "External id": 471106,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065249.259, "dur": 0.761, + "args": { + "External id": 471107,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065251.606, "dur": 0.470, + "args": { + "External id": 471108,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065253.709, "dur": 0.512, + "args": { + "External id": 471109,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065256.083, "dur": 2.985, + "args": { + "External id": 471110,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455065275.398, "dur": 52.283, + "args": { + "External id": 471111,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267455065363.019, "dur": 126.547, + "args": { + "External id": 471112,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455065375.620, "dur": 3.466, + "args": { + "External id": 471113,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267455065384.910, "dur": 12.920, + "args": { + "External id": 471114,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267455065390.531, "dur": 6.814, + "args": { + "External id": 471115,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065394.802, "dur": 0.897, + "args": { + "External id": 471116,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455065405.643, "dur": 26.465, + "args": { + "External id": 471117,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065407.443, "dur": 0.703, + "args": { + "External id": 471118,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065409.735, "dur": 0.600, + "args": { + "External id": 471119,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065412.048, "dur": 0.472, + "args": { + "External id": 471120,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065414.235, "dur": 0.553, + "args": { + "External id": 471121,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065416.122, "dur": 0.384, + "args": { + "External id": 471122,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065418.809, "dur": 3.254, + "args": { + "External id": 471123,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065423.624, "dur": 0.594, + "args": { + "External id": 471124,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065425.902, "dur": 0.537, + "args": { + "External id": 471125,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455065428.625, "dur": 0.462, + "args": { + "External id": 471126,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455065445.218, "dur": 35.708, + "args": { + "External id": 471127,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455065542.947, "dur": 158.408, + "args": { + "External id": 471128,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455065572.570, "dur": 124.502, + "args": { + "External id": 471129,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9304, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455065585.043, "dur": 106.831, + "args": { + "External id": 471130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455065722.907, "dur": 2.706, + "args": { + "External id": 471131,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9306, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455065827.823, "dur": 2138.831, + "args": { + "External id": 471132,"Sequence number": 5036625, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9307 + } + }, + { + "ph": "f", "id": 418, "pid": 2338707, "tid": 2379456, "ts": 6267455065827.823, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455065960.492, "dur": 180.787, + "args": { + "External id": 471133,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455066198.784, "dur": 45.937, + "args": { + "External id": 471134,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267455066266.382, "dur": 65.809, + "args": { + "External id": 471135,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455066344.388, "dur": 39.108, + "args": { + "External id": 471136,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455066390.371, "dur": 37.480, + "args": { + "External id": 471137,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455066437.464, "dur": 31.989, + "args": { + "External id": 471138,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455066476.978, "dur": 33.373, + "args": { + "External id": 471139,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455066539.549, "dur": 27.475, + "args": { + "External id": 471140,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455066590.112, "dur": 48.934, + "args": { + "External id": 471141,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455066669.025, "dur": 23.250, + "args": { + "External id": 471142,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455066709.917, "dur": 17.418, + "args": { + "External id": 471143,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455066738.738, "dur": 53.233, + "args": { + "External id": 471144,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455066796.866, "dur": 39.286, + "args": { + "External id": 471145,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267455066886.650, "dur": 355.103, + "args": { + "External id": 471146,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455066991.540, "dur": 11.058, + "args": { + "External id": 471147,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455067005.659, "dur": 3.379, + "args": { + "External id": 471148,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455067010.702, "dur": 2.843, + "args": { + "External id": 471149,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455067015.267, "dur": 3.009, + "args": { + "External id": 471150,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455067116.988, "dur": 6.872, + "args": { + "External id": 471151,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455067119.588, "dur": 3.857, + "args": { + "External id": 471152,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455067126.347, "dur": 38.667, + "args": { + "External id": 471153,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455067133.606, "dur": 2.100, + "args": { + "External id": 471154,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455067167.047, "dur": 2.467, + "args": { + "External id": 471155,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455067168.429, "dur": 0.994, + "args": { + "External id": 471156,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455067171.273, "dur": 16.919, + "args": { + "External id": 471157,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455067174.279, "dur": 0.808, + "args": { + "External id": 471158,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455067282.412, "dur": 34.768, + "args": { + "External id": 471159,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455067337.395, "dur": 19.534, + "args": { + "External id": 471160,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455067366.909, "dur": 59.649, + "args": { + "External id": 471161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455067437.753, "dur": 48.393, + "args": { + "External id": 471162,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455067506.424, "dur": 29.003, + "args": { + "External id": 471163,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455067542.966, "dur": 38.314, + "args": { + "External id": 471164,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455067590.685, "dur": 63.110, + "args": { + "External id": 471165,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455067666.461, "dur": 37.865, + "args": { + "External id": 471166,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267455067733.288, "dur": 33.794, + "args": { + "External id": 471167,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455067787.368, "dur": 30.660, + "args": { + "External id": 471168,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455067836.086, "dur": 23.326, + "args": { + "External id": 471169,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455067877.416, "dur": 18.572, + "args": { + "External id": 471170,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267455067910.705, "dur": 19.309, + "args": { + "External id": 471171,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068016.972, "dur": 19.289, + "args": { + "External id": 471172,"Record function id": 0, "Ev Idx": 9347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068021.206, "dur": 13.783, + "args": { + "External id": 471173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068026.269, "dur": 7.575, + "args": { + "External id": 471174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068028.862, "dur": 4.848, + "args": { + "External id": 471175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068041.265, "dur": 8.779, + "args": { + "External id": 471176,"Record function id": 0, "Ev Idx": 9351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068042.688, "dur": 6.846, + "args": { + "External id": 471177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068043.603, "dur": 5.321, + "args": { + "External id": 471178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068044.781, "dur": 4.038, + "args": { + "External id": 471179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068054.166, "dur": 65.312, + "args": { + "External id": 471180,"Record function id": 0, "Ev Idx": 9355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068111.101, "dur": 7.219, + "args": { + "External id": 471181,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068113.356, "dur": 3.653, + "args": { + "External id": 471182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068114.423, "dur": 2.281, + "args": { + "External id": 471183,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068125.893, "dur": 6.018, + "args": { + "External id": 471184,"Record function id": 0, "Ev Idx": 9359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068128.254, "dur": 3.189, + "args": { + "External id": 471185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068129.010, "dur": 1.910, + "args": { + "External id": 471186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068129.907, "dur": 0.940, + "args": { + "External id": 471187,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068135.643, "dur": 5.271, + "args": { + "External id": 471188,"Record function id": 0, "Ev Idx": 9363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068137.408, "dur": 3.008, + "args": { + "External id": 471189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068138.186, "dur": 1.377, + "args": { + "External id": 471190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068138.713, "dur": 0.774, + "args": { + "External id": 471191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068145.193, "dur": 5.259, + "args": { + "External id": 471192,"Record function id": 0, "Ev Idx": 9367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068146.852, "dur": 2.908, + "args": { + "External id": 471193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068147.464, "dur": 1.797, + "args": { + "External id": 471194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068148.107, "dur": 1.053, + "args": { + "External id": 471195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068154.440, "dur": 4.472, + "args": { + "External id": 471196,"Record function id": 0, "Ev Idx": 9371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068155.890, "dur": 2.540, + "args": { + "External id": 471197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068156.488, "dur": 1.410, + "args": { + "External id": 471198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068157.006, "dur": 0.816, + "args": { + "External id": 471199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068162.616, "dur": 4.311, + "args": { + "External id": 471200,"Record function id": 0, "Ev Idx": 9375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068164.003, "dur": 2.438, + "args": { + "External id": 471201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068164.573, "dur": 1.283, + "args": { + "External id": 471202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068164.976, "dur": 0.778, + "args": { + "External id": 471203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068170.536, "dur": 7.271, + "args": { + "External id": 471204,"Record function id": 0, "Ev Idx": 9379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455068172.283, "dur": 5.053, + "args": { + "External id": 471205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068172.933, "dur": 3.915, + "args": { + "External id": 471206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455068173.611, "dur": 3.099, + "args": { + "External id": 471207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455068182.409, "dur": 61737.414, + "args": { + "External id": 471208,"Record function id": 0, "Sequence number": 5036624, "Fwd thread id": 1, "Ev Idx": 9383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455068183.915, "dur": 61723.159, + "args": { + "External id": 471209,"Sequence number": 5036624, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9384 + } + }, + { + "ph": "f", "id": 419, "pid": 2338707, "tid": 2379456, "ts": 6267455068183.915, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 2338707, "tid": 2379456, + "ts": 6267455068220.302, "dur": 45.768, + "args": { + "External id": 471210,"Record function id": 0, "Ev Idx": 9385 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 2338707, "tid": 2379456, + "ts": 6267455068278.987, "dur": 79.434, + "args": { + "External id": 471211,"Record function id": 0, "Ev Idx": 9386 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 2338707, "tid": 2379456, + "ts": 6267455068365.050, "dur": 61532.201, + "args": { + "External id": 471212,"Record function id": 0, "Ev Idx": 9387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455068478.877, "dur": 8.990, + "args": { + "External id": 471213,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455068499.617, "dur": 5.687, + "args": { + "External id": 471214,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455068525.375, "dur": 60003.402, + "args": { + "External id": 471215,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455068541.932, "dur": 59970.010, + "args": { + "External id": 471216,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455068687.974, "dur": 30.193, + "args": { + "External id": 471217,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455068745.793, "dur": 59712.203, + "args": { + "External id": 471218,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455068749.248, "dur": 59707.876, + "args": { + "External id": 471219,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455068755.059, "dur": 18.590, + "args": { + "External id": 471220,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455068776.074, "dur": 59675.446, + "args": { + "External id": 471221,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455128686.782, "dur": 15.066, + "args": { + "External id": 471222,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455128691.928, "dur": 9.171, + "args": { + "External id": 471223,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455128743.881, "dur": 478.084, + "args": { + "External id": 471224,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455128791.045, "dur": 423.754, + "args": { + "External id": 471225,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9400, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455128810.728, "dur": 396.653, + "args": { + "External id": 471226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455129249.993, "dur": 2.818, + "args": { + "External id": 471227,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9402, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455129328.820, "dur": 8.099, + "args": { + "External id": 471228,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455129351.536, "dur": 43.071, + "args": { + "External id": 471229,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455129407.354, "dur": 1.840, + "args": { + "External id": 471230,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455129415.204, "dur": 17.060, + "args": { + "External id": 471231,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455129438.784, "dur": 1.547, + "args": { + "External id": 471232,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455129445.121, "dur": 13.399, + "args": { + "External id": 471233,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455129465.217, "dur": 3.410, + "args": { + "External id": 471234,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455129473.447, "dur": 14.751, + "args": { + "External id": 471235,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455129493.105, "dur": 0.910, + "args": { + "External id": 471236,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455129498.805, "dur": 12.913, + "args": { + "External id": 471237,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455129516.533, "dur": 1.455, + "args": { + "External id": 471238,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455129522.400, "dur": 13.646, + "args": { + "External id": 471239,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455129541.255, "dur": 1.254, + "args": { + "External id": 471240,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455129547.144, "dur": 13.171, + "args": { + "External id": 471241,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455129566.020, "dur": 1.346, + "args": { + "External id": 471242,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455129572.257, "dur": 13.284, + "args": { + "External id": 471243,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455129589.945, "dur": 1.243, + "args": { + "External id": 471244,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455129657.130, "dur": 36.203, + "args": { + "External id": 471245,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455129940.111, "dur": 3539.692, + "args": { + "External id": 471246,"Record function id": 0, "Ev Idx": 9421 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 2338707, "tid": 2379456, + "ts": 6267455129963.996, "dur": 1337.023, + "args": { + "External id": 471247,"Record function id": 0, "Ev Idx": 9422 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338707, "tid": 2379456, + "ts": 6267455129982.066, "dur": 443.341, + "args": { + "External id": 471248,"Record function id": 0, "Ev Idx": 9423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455130120.012, "dur": 6.600, + "args": { + "External id": 471249,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455130131.202, "dur": 1.374, + "args": { + "External id": 471250,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455130135.101, "dur": 3.907, + "args": { + "External id": 471251,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455130140.693, "dur": 1.202, + "args": { + "External id": 471252,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455130144.470, "dur": 1.056, + "args": { + "External id": 471253,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455130147.062, "dur": 1.074, + "args": { + "External id": 471254,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455130152.643, "dur": 0.998, + "args": { + "External id": 471255,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455130155.433, "dur": 1.625, + "args": { + "External id": 471256,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455130158.609, "dur": 0.597, + "args": { + "External id": 471257,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455130160.806, "dur": 1.060, + "args": { + "External id": 471258,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455130186.366, "dur": 196.330, + "args": { + "External id": 471259,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455130207.025, "dur": 170.275, + "args": { + "External id": 471260,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455130239.699, "dur": 19.728, + "args": { + "External id": 471261,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455130264.013, "dur": 81.173, + "args": { + "External id": 471262,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455130267.302, "dur": 77.502, + "args": { + "External id": 471263,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130272.776, "dur": 6.657, + "args": { + "External id": 471264,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455130282.047, "dur": 61.835, + "args": { + "External id": 471265,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9440 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 2338707, "tid": 2379456, + "ts": 6267455130522.723, "dur": 766.062, + "args": { + "External id": 471266,"Record function id": 0, "Ev Idx": 9441 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338707, "tid": 2379456, + "ts": 6267455130545.444, "dur": 726.719, + "args": { + "External id": 471267,"Record function id": 0, "Ev Idx": 9442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455130636.606, "dur": 8.787, + "args": { + "External id": 471268,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455130664.621, "dur": 35.700, + "args": { + "External id": 471269,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130670.928, "dur": 2.105, + "args": { + "External id": 471270,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130675.703, "dur": 0.593, + "args": { + "External id": 471271,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130678.064, "dur": 0.684, + "args": { + "External id": 471272,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130680.448, "dur": 0.987, + "args": { + "External id": 471273,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130683.332, "dur": 0.519, + "args": { + "External id": 471274,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130685.735, "dur": 3.051, + "args": { + "External id": 471275,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130689.957, "dur": 0.759, + "args": { + "External id": 471276,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130693.019, "dur": 0.528, + "args": { + "External id": 471277,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130695.581, "dur": 0.382, + "args": { + "External id": 471278,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455130712.979, "dur": 52.952, + "args": { + "External id": 471279,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267455130803.302, "dur": 129.747, + "args": { + "External id": 471280,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455130815.699, "dur": 3.836, + "args": { + "External id": 471281,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267455130825.554, "dur": 13.139, + "args": { + "External id": 471282,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267455130831.296, "dur": 6.887, + "args": { + "External id": 471283,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130835.593, "dur": 0.801, + "args": { + "External id": 471284,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455130846.422, "dur": 33.086, + "args": { + "External id": 471285,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130848.619, "dur": 0.748, + "args": { + "External id": 471286,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130850.934, "dur": 0.555, + "args": { + "External id": 471287,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130853.703, "dur": 2.728, + "args": { + "External id": 471288,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130858.111, "dur": 0.536, + "args": { + "External id": 471289,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130859.795, "dur": 0.589, + "args": { + "External id": 471290,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130868.893, "dur": 0.385, + "args": { + "External id": 471291,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130871.240, "dur": 0.562, + "args": { + "External id": 471292,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130874.009, "dur": 0.673, + "args": { + "External id": 471293,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455130875.984, "dur": 0.430, + "args": { + "External id": 471294,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455130891.060, "dur": 32.615, + "args": { + "External id": 471295,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455130986.955, "dur": 186.316, + "args": { + "External id": 471296,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455131019.999, "dur": 148.372, + "args": { + "External id": 471297,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9472, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455131032.130, "dur": 130.707, + "args": { + "External id": 471298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455131197.212, "dur": 2.786, + "args": { + "External id": 471299,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9474, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455131309.408, "dur": 2145.593, + "args": { + "External id": 471300,"Sequence number": 5036623, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9475 + } + }, + { + "ph": "f", "id": 420, "pid": 2338707, "tid": 2379456, "ts": 6267455131309.408, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455131443.882, "dur": 134.928, + "args": { + "External id": 471301,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455131650.790, "dur": 51.453, + "args": { + "External id": 471302,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267455131723.723, "dur": 66.409, + "args": { + "External id": 471303,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455131804.452, "dur": 38.372, + "args": { + "External id": 471304,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455131849.591, "dur": 37.545, + "args": { + "External id": 471305,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455131894.556, "dur": 31.937, + "args": { + "External id": 471306,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455131933.390, "dur": 33.077, + "args": { + "External id": 471307,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455131998.315, "dur": 27.692, + "args": { + "External id": 471308,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455132047.382, "dur": 77.324, + "args": { + "External id": 471309,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455132172.095, "dur": 33.940, + "args": { + "External id": 471310,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455132229.856, "dur": 17.783, + "args": { + "External id": 471311,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455132260.519, "dur": 60.324, + "args": { + "External id": 471312,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455132326.261, "dur": 44.872, + "args": { + "External id": 471313,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267455132411.213, "dur": 323.254, + "args": { + "External id": 471314,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455132510.517, "dur": 7.644, + "args": { + "External id": 471315,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455132520.745, "dur": 3.622, + "args": { + "External id": 471316,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455132526.196, "dur": 2.568, + "args": { + "External id": 471317,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455132530.513, "dur": 3.362, + "args": { + "External id": 471318,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455132584.699, "dur": 5.953, + "args": { + "External id": 471319,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455132587.040, "dur": 3.416, + "args": { + "External id": 471320,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455132611.007, "dur": 43.160, + "args": { + "External id": 471321,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455132618.279, "dur": 4.976, + "args": { + "External id": 471322,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455132656.124, "dur": 3.170, + "args": { + "External id": 471323,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455132657.764, "dur": 1.394, + "args": { + "External id": 471324,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455132660.618, "dur": 19.763, + "args": { + "External id": 471325,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455132663.524, "dur": 0.670, + "args": { + "External id": 471326,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455132774.630, "dur": 33.377, + "args": { + "External id": 471327,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455132835.732, "dur": 20.513, + "args": { + "External id": 471328,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455132865.981, "dur": 57.556, + "args": { + "External id": 471329,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455132932.406, "dur": 46.796, + "args": { + "External id": 471330,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455132993.077, "dur": 27.693, + "args": { + "External id": 471331,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455133028.813, "dur": 79.564, + "args": { + "External id": 471332,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455133124.268, "dur": 40.254, + "args": { + "External id": 471333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455133173.817, "dur": 36.574, + "args": { + "External id": 471334,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267455133234.779, "dur": 31.993, + "args": { + "External id": 471335,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455133285.516, "dur": 28.199, + "args": { + "External id": 471336,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455133328.663, "dur": 19.713, + "args": { + "External id": 471337,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455133365.148, "dur": 17.194, + "args": { + "External id": 471338,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267455133396.758, "dur": 17.806, + "args": { + "External id": 471339,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133506.159, "dur": 18.149, + "args": { + "External id": 471340,"Record function id": 0, "Ev Idx": 9515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133509.971, "dur": 13.175, + "args": { + "External id": 471341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133514.887, "dur": 7.086, + "args": { + "External id": 471342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133517.256, "dur": 4.575, + "args": { + "External id": 471343,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133528.923, "dur": 6.753, + "args": { + "External id": 471344,"Record function id": 0, "Ev Idx": 9519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133530.619, "dur": 4.543, + "args": { + "External id": 471345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133532.043, "dur": 2.574, + "args": { + "External id": 471346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133533.148, "dur": 1.370, + "args": { + "External id": 471347,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133539.636, "dur": 5.576, + "args": { + "External id": 471348,"Record function id": 0, "Ev Idx": 9523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133541.716, "dur": 3.022, + "args": { + "External id": 471349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133542.239, "dur": 1.975, + "args": { + "External id": 471350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133542.625, "dur": 1.512, + "args": { + "External id": 471351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133552.323, "dur": 4.545, + "args": { + "External id": 471352,"Record function id": 0, "Ev Idx": 9527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133553.723, "dur": 2.644, + "args": { + "External id": 471353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133554.521, "dur": 1.348, + "args": { + "External id": 471354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133555.043, "dur": 0.752, + "args": { + "External id": 471355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133560.511, "dur": 7.825, + "args": { + "External id": 471356,"Record function id": 0, "Ev Idx": 9531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133562.006, "dur": 5.849, + "args": { + "External id": 471357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133562.587, "dur": 4.688, + "args": { + "External id": 471358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133563.031, "dur": 4.172, + "args": { + "External id": 471359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133572.019, "dur": 4.713, + "args": { + "External id": 471360,"Record function id": 0, "Ev Idx": 9535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133573.486, "dur": 2.717, + "args": { + "External id": 471361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133574.050, "dur": 1.608, + "args": { + "External id": 471362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133574.649, "dur": 0.874, + "args": { + "External id": 471363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133580.561, "dur": 4.129, + "args": { + "External id": 471364,"Record function id": 0, "Ev Idx": 9539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133581.990, "dur": 2.207, + "args": { + "External id": 471365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133582.549, "dur": 1.135, + "args": { + "External id": 471366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133582.879, "dur": 0.719, + "args": { + "External id": 471367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133588.302, "dur": 25.845, + "args": { + "External id": 471368,"Record function id": 0, "Ev Idx": 9543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133589.427, "dur": 23.389, + "args": { + "External id": 471369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133590.281, "dur": 21.202, + "args": { + "External id": 471370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133590.628, "dur": 20.352, + "args": { + "External id": 471371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133620.749, "dur": 6.872, + "args": { + "External id": 471372,"Record function id": 0, "Ev Idx": 9547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455133622.754, "dur": 4.273, + "args": { + "External id": 471373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133623.958, "dur": 2.605, + "args": { + "External id": 471374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455133624.931, "dur": 1.494, + "args": { + "External id": 471375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455133632.558, "dur": 62474.390, + "args": { + "External id": 471376,"Record function id": 0, "Sequence number": 5036622, "Fwd thread id": 1, "Ev Idx": 9551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455133634.150, "dur": 62460.080, + "args": { + "External id": 471377,"Sequence number": 5036622, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9552 + } + }, + { + "ph": "f", "id": 421, "pid": 2338707, "tid": 2379456, "ts": 6267455133634.150, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 2338707, "tid": 2379456, + "ts": 6267455133671.828, "dur": 45.465, + "args": { + "External id": 471378,"Record function id": 0, "Ev Idx": 9553 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 2338707, "tid": 2379456, + "ts": 6267455133726.758, "dur": 72.927, + "args": { + "External id": 471379,"Record function id": 0, "Ev Idx": 9554 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 2338707, "tid": 2379456, + "ts": 6267455133806.347, "dur": 62242.896, + "args": { + "External id": 471380,"Record function id": 0, "Ev Idx": 9555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455133932.418, "dur": 14.926, + "args": { + "External id": 471381,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455133960.000, "dur": 6.250, + "args": { + "External id": 471382,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455133984.223, "dur": 60835.778, + "args": { + "External id": 471383,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455134004.287, "dur": 60799.084, + "args": { + "External id": 471384,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455134164.203, "dur": 25.716, + "args": { + "External id": 471385,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455134217.014, "dur": 60525.679, + "args": { + "External id": 471386,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455134220.423, "dur": 60520.918, + "args": { + "External id": 471387,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455134227.049, "dur": 17.315, + "args": { + "External id": 471388,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455134248.926, "dur": 60485.118, + "args": { + "External id": 471389,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455194965.239, "dur": 15.878, + "args": { + "External id": 471390,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455194970.531, "dur": 10.070, + "args": { + "External id": 471391,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455195024.533, "dur": 548.368, + "args": { + "External id": 471392,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455195113.485, "dur": 451.879, + "args": { + "External id": 471393,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9568, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455195131.450, "dur": 427.183, + "args": { + "External id": 471394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455195614.749, "dur": 4.297, + "args": { + "External id": 471395,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9570, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455195698.530, "dur": 10.562, + "args": { + "External id": 471396,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455195723.766, "dur": 42.622, + "args": { + "External id": 471397,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455195778.586, "dur": 2.072, + "args": { + "External id": 471398,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455195786.328, "dur": 16.525, + "args": { + "External id": 471399,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455195809.361, "dur": 1.089, + "args": { + "External id": 471400,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455195815.495, "dur": 14.536, + "args": { + "External id": 471401,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455195835.641, "dur": 0.985, + "args": { + "External id": 471402,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455195840.919, "dur": 13.730, + "args": { + "External id": 471403,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455195859.747, "dur": 0.883, + "args": { + "External id": 471404,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455195864.783, "dur": 12.135, + "args": { + "External id": 471405,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455195882.425, "dur": 1.382, + "args": { + "External id": 471406,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455195887.799, "dur": 14.233, + "args": { + "External id": 471407,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455195906.814, "dur": 0.977, + "args": { + "External id": 471408,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455195914.736, "dur": 13.344, + "args": { + "External id": 471409,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455195932.941, "dur": 1.249, + "args": { + "External id": 471410,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455195939.126, "dur": 14.639, + "args": { + "External id": 471411,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455195958.035, "dur": 3.391, + "args": { + "External id": 471412,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455195965.325, "dur": 12.782, + "args": { + "External id": 471413,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455196127.061, "dur": 3487.841, + "args": { + "External id": 471414,"Record function id": 0, "Ev Idx": 9589 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 2338707, "tid": 2379456, + "ts": 6267455196157.937, "dur": 1258.055, + "args": { + "External id": 471415,"Record function id": 0, "Ev Idx": 9590 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338707, "tid": 2379456, + "ts": 6267455196175.286, "dur": 388.074, + "args": { + "External id": 471416,"Record function id": 0, "Ev Idx": 9591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455196273.887, "dur": 5.816, + "args": { + "External id": 471417,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455196283.615, "dur": 1.122, + "args": { + "External id": 471418,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455196287.914, "dur": 1.072, + "args": { + "External id": 471419,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455196290.891, "dur": 1.099, + "args": { + "External id": 471420,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455196294.487, "dur": 0.783, + "args": { + "External id": 471421,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455196299.265, "dur": 1.070, + "args": { + "External id": 471422,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455196302.204, "dur": 0.833, + "args": { + "External id": 471423,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455196304.652, "dur": 3.824, + "args": { + "External id": 471424,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455196310.135, "dur": 0.897, + "args": { + "External id": 471425,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455196315.275, "dur": 0.835, + "args": { + "External id": 471426,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455196337.806, "dur": 188.903, + "args": { + "External id": 471427,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455196357.798, "dur": 163.338, + "args": { + "External id": 471428,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455196388.560, "dur": 16.226, + "args": { + "External id": 471429,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455196409.396, "dur": 79.660, + "args": { + "External id": 471430,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455196412.724, "dur": 75.832, + "args": { + "External id": 471431,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196417.877, "dur": 6.418, + "args": { + "External id": 471432,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455196426.713, "dur": 61.109, + "args": { + "External id": 471433,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9608 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 2338707, "tid": 2379456, + "ts": 6267455196681.950, "dur": 724.744, + "args": { + "External id": 471434,"Record function id": 0, "Ev Idx": 9609 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338707, "tid": 2379456, + "ts": 6267455196703.248, "dur": 688.623, + "args": { + "External id": 471435,"Record function id": 0, "Ev Idx": 9610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455196774.213, "dur": 7.685, + "args": { + "External id": 471436,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455196800.028, "dur": 35.826, + "args": { + "External id": 471437,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196807.430, "dur": 2.128, + "args": { + "External id": 471438,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196811.762, "dur": 0.583, + "args": { + "External id": 471439,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196814.296, "dur": 3.600, + "args": { + "External id": 471440,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196820.050, "dur": 0.453, + "args": { + "External id": 471441,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196821.766, "dur": 0.463, + "args": { + "External id": 471442,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196824.440, "dur": 0.516, + "args": { + "External id": 471443,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196826.737, "dur": 0.606, + "args": { + "External id": 471444,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196828.646, "dur": 0.515, + "args": { + "External id": 471445,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196831.112, "dur": 0.474, + "args": { + "External id": 471446,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455196850.649, "dur": 49.380, + "args": { + "External id": 471447,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267455196938.323, "dur": 169.938, + "args": { + "External id": 471448,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455196950.584, "dur": 3.381, + "args": { + "External id": 471449,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267455196960.014, "dur": 14.655, + "args": { + "External id": 471450,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267455196965.450, "dur": 8.736, + "args": { + "External id": 471451,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196969.409, "dur": 3.260, + "args": { + "External id": 471452,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455196982.520, "dur": 27.171, + "args": { + "External id": 471453,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196985.006, "dur": 0.555, + "args": { + "External id": 471454,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196987.636, "dur": 0.699, + "args": { + "External id": 471455,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196990.544, "dur": 0.557, + "args": { + "External id": 471456,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196993.577, "dur": 0.714, + "args": { + "External id": 471457,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196995.644, "dur": 0.715, + "args": { + "External id": 471458,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455196998.389, "dur": 0.505, + "args": { + "External id": 471459,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455197000.369, "dur": 0.434, + "args": { + "External id": 471460,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455197001.971, "dur": 2.501, + "args": { + "External id": 471461,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455197006.375, "dur": 0.536, + "args": { + "External id": 471462,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455197020.631, "dur": 75.951, + "args": { + "External id": 471463,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455197167.158, "dur": 141.087, + "args": { + "External id": 471464,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455197200.025, "dur": 103.858, + "args": { + "External id": 471465,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9640, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455197211.805, "dur": 87.329, + "args": { + "External id": 471466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455197326.092, "dur": 2.437, + "args": { + "External id": 471467,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9642, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455197424.372, "dur": 2148.766, + "args": { + "External id": 471468,"Sequence number": 5036621, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9643 + } + }, + { + "ph": "f", "id": 422, "pid": 2338707, "tid": 2379456, "ts": 6267455197424.372, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455197555.602, "dur": 148.860, + "args": { + "External id": 471469,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455197757.765, "dur": 48.875, + "args": { + "External id": 471470,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267455197829.349, "dur": 63.715, + "args": { + "External id": 471471,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455197904.940, "dur": 37.800, + "args": { + "External id": 471472,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455197950.332, "dur": 37.595, + "args": { + "External id": 471473,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455197995.651, "dur": 32.138, + "args": { + "External id": 471474,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455198037.539, "dur": 77.472, + "args": { + "External id": 471475,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455198150.609, "dur": 30.055, + "args": { + "External id": 471476,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455198204.099, "dur": 33.918, + "args": { + "External id": 471477,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455198264.023, "dur": 24.572, + "args": { + "External id": 471478,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455198305.606, "dur": 18.626, + "args": { + "External id": 471479,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455198337.572, "dur": 51.593, + "args": { + "External id": 471480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455198393.753, "dur": 38.788, + "args": { + "External id": 471481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267455198466.696, "dur": 356.266, + "args": { + "External id": 471482,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455198561.553, "dur": 7.549, + "args": { + "External id": 471483,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455198571.810, "dur": 15.422, + "args": { + "External id": 471484,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455198608.075, "dur": 8.301, + "args": { + "External id": 471485,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455198618.582, "dur": 2.513, + "args": { + "External id": 471486,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455198683.027, "dur": 6.142, + "args": { + "External id": 471487,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455198685.324, "dur": 3.648, + "args": { + "External id": 471488,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455198691.531, "dur": 44.420, + "args": { + "External id": 471489,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455198698.755, "dur": 2.151, + "args": { + "External id": 471490,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455198737.963, "dur": 2.373, + "args": { + "External id": 471491,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455198739.357, "dur": 0.882, + "args": { + "External id": 471492,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455198741.879, "dur": 15.860, + "args": { + "External id": 471493,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455198744.344, "dur": 0.423, + "args": { + "External id": 471494,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455198873.238, "dur": 35.643, + "args": { + "External id": 471495,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455198930.397, "dur": 19.913, + "args": { + "External id": 471496,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455198960.356, "dur": 62.635, + "args": { + "External id": 471497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455199032.414, "dur": 94.332, + "args": { + "External id": 471498,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455199145.413, "dur": 32.467, + "args": { + "External id": 471499,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455199186.082, "dur": 37.000, + "args": { + "External id": 471500,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455199232.656, "dur": 33.559, + "args": { + "External id": 471501,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455199274.600, "dur": 35.302, + "args": { + "External id": 471502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267455199332.905, "dur": 29.363, + "args": { + "External id": 471503,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455199380.504, "dur": 29.043, + "args": { + "External id": 471504,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455199425.847, "dur": 20.193, + "args": { + "External id": 471505,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455199477.447, "dur": 17.364, + "args": { + "External id": 471506,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267455199513.074, "dur": 18.978, + "args": { + "External id": 471507,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199645.629, "dur": 19.758, + "args": { + "External id": 471508,"Record function id": 0, "Ev Idx": 9683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199649.727, "dur": 14.385, + "args": { + "External id": 471509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199654.917, "dur": 7.499, + "args": { + "External id": 471510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199657.221, "dur": 5.038, + "args": { + "External id": 471511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199670.010, "dur": 5.635, + "args": { + "External id": 471512,"Record function id": 0, "Ev Idx": 9687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199671.441, "dur": 3.671, + "args": { + "External id": 471513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199672.164, "dur": 2.290, + "args": { + "External id": 471514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199673.245, "dur": 1.108, + "args": { + "External id": 471515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199679.618, "dur": 4.996, + "args": { + "External id": 471516,"Record function id": 0, "Ev Idx": 9691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199681.000, "dur": 3.133, + "args": { + "External id": 471517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199681.682, "dur": 1.816, + "args": { + "External id": 471518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199682.065, "dur": 1.359, + "args": { + "External id": 471519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199688.365, "dur": 7.336, + "args": { + "External id": 471520,"Record function id": 0, "Ev Idx": 9695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199689.831, "dur": 5.374, + "args": { + "External id": 471521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199690.769, "dur": 3.924, + "args": { + "External id": 471522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199691.325, "dur": 3.284, + "args": { + "External id": 471523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199699.489, "dur": 4.521, + "args": { + "External id": 471524,"Record function id": 0, "Ev Idx": 9699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199700.770, "dur": 2.747, + "args": { + "External id": 471525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199701.390, "dur": 1.552, + "args": { + "External id": 471526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199701.992, "dur": 0.873, + "args": { + "External id": 471527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199707.743, "dur": 4.534, + "args": { + "External id": 471528,"Record function id": 0, "Ev Idx": 9703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199708.817, "dur": 2.951, + "args": { + "External id": 471529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199709.572, "dur": 1.644, + "args": { + "External id": 471530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199710.385, "dur": 0.693, + "args": { + "External id": 471531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199715.990, "dur": 3.715, + "args": { + "External id": 471532,"Record function id": 0, "Ev Idx": 9707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199717.112, "dur": 2.113, + "args": { + "External id": 471533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199717.683, "dur": 1.062, + "args": { + "External id": 471534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199718.027, "dur": 0.641, + "args": { + "External id": 471535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199723.630, "dur": 4.075, + "args": { + "External id": 471536,"Record function id": 0, "Ev Idx": 9711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199725.018, "dur": 2.145, + "args": { + "External id": 471537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199725.541, "dur": 1.141, + "args": { + "External id": 471538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199725.853, "dur": 0.714, + "args": { + "External id": 471539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199731.776, "dur": 4.370, + "args": { + "External id": 471540,"Record function id": 0, "Ev Idx": 9715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455199733.132, "dur": 2.561, + "args": { + "External id": 471541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199733.648, "dur": 1.523, + "args": { + "External id": 471542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455199734.188, "dur": 0.853, + "args": { + "External id": 471543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455199741.131, "dur": 63489.644, + "args": { + "External id": 471544,"Record function id": 0, "Sequence number": 5036620, "Fwd thread id": 1, "Ev Idx": 9719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455199742.867, "dur": 63476.087, + "args": { + "External id": 471545,"Sequence number": 5036620, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9720 + } + }, + { + "ph": "f", "id": 423, "pid": 2338707, "tid": 2379456, "ts": 6267455199742.867, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 2338707, "tid": 2379456, + "ts": 6267455199778.534, "dur": 42.919, + "args": { + "External id": 471546,"Record function id": 0, "Ev Idx": 9721 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 2338707, "tid": 2379456, + "ts": 6267455199829.990, "dur": 74.372, + "args": { + "External id": 471547,"Record function id": 0, "Ev Idx": 9722 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 2338707, "tid": 2379456, + "ts": 6267455199911.272, "dur": 63298.501, + "args": { + "External id": 471548,"Record function id": 0, "Ev Idx": 9723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455200017.992, "dur": 7.631, + "args": { + "External id": 471549,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455200037.680, "dur": 5.679, + "args": { + "External id": 471550,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455200117.485, "dur": 61924.161, + "args": { + "External id": 471551,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455200136.137, "dur": 61889.078, + "args": { + "External id": 471552,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455200289.657, "dur": 34.060, + "args": { + "External id": 471553,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455200357.074, "dur": 61613.401, + "args": { + "External id": 471554,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455200360.349, "dur": 61608.012, + "args": { + "External id": 471555,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455200372.794, "dur": 17.433, + "args": { + "External id": 471556,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455200393.081, "dur": 61572.228, + "args": { + "External id": 471557,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455262200.685, "dur": 13.821, + "args": { + "External id": 471558,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455262204.816, "dur": 9.054, + "args": { + "External id": 471559,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455262250.150, "dur": 449.416, + "args": { + "External id": 471560,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455262286.050, "dur": 406.845, + "args": { + "External id": 471561,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9736, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455262300.526, "dur": 385.439, + "args": { + "External id": 471562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455262725.377, "dur": 2.771, + "args": { + "External id": 471563,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9738, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455262800.818, "dur": 7.887, + "args": { + "External id": 471564,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455262823.258, "dur": 43.327, + "args": { + "External id": 471565,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455262878.716, "dur": 1.849, + "args": { + "External id": 471566,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455262886.630, "dur": 16.245, + "args": { + "External id": 471567,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455262910.141, "dur": 1.239, + "args": { + "External id": 471568,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455262915.905, "dur": 13.428, + "args": { + "External id": 471569,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455262935.050, "dur": 1.197, + "args": { + "External id": 471570,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455262940.812, "dur": 14.885, + "args": { + "External id": 471571,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455262961.514, "dur": 1.162, + "args": { + "External id": 471572,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455262966.879, "dur": 12.712, + "args": { + "External id": 471573,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455262984.532, "dur": 1.422, + "args": { + "External id": 471574,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455262990.082, "dur": 12.482, + "args": { + "External id": 471575,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455263007.572, "dur": 3.759, + "args": { + "External id": 471576,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455263017.848, "dur": 11.432, + "args": { + "External id": 471577,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455263034.650, "dur": 1.054, + "args": { + "External id": 471578,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455263039.898, "dur": 63.981, + "args": { + "External id": 471579,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455263114.440, "dur": 1.817, + "args": { + "External id": 471580,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455263121.019, "dur": 16.004, + "args": { + "External id": 471581,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455263249.840, "dur": 3474.301, + "args": { + "External id": 471582,"Record function id": 0, "Ev Idx": 9757 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 2338707, "tid": 2379456, + "ts": 6267455263273.715, "dur": 1250.293, + "args": { + "External id": 471583,"Record function id": 0, "Ev Idx": 9758 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338707, "tid": 2379456, + "ts": 6267455263290.191, "dur": 428.361, + "args": { + "External id": 471584,"Record function id": 0, "Ev Idx": 9759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455263386.679, "dur": 5.179, + "args": { + "External id": 471585,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455263396.000, "dur": 1.073, + "args": { + "External id": 471586,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455263399.069, "dur": 0.815, + "args": { + "External id": 471587,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455263402.267, "dur": 1.088, + "args": { + "External id": 471588,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455263404.995, "dur": 0.978, + "args": { + "External id": 471589,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455263410.194, "dur": 3.413, + "args": { + "External id": 471590,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455263415.531, "dur": 1.130, + "args": { + "External id": 471591,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455263421.727, "dur": 2.020, + "args": { + "External id": 471592,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455263425.463, "dur": 0.728, + "args": { + "External id": 471593,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455263430.333, "dur": 1.013, + "args": { + "External id": 471594,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455263457.251, "dur": 219.222, + "args": { + "External id": 471595,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455263475.561, "dur": 194.015, + "args": { + "External id": 471596,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455263504.506, "dur": 21.690, + "args": { + "External id": 471597,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455263530.592, "dur": 105.517, + "args": { + "External id": 471598,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455263533.720, "dur": 101.875, + "args": { + "External id": 471599,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455263538.905, "dur": 6.490, + "args": { + "External id": 471600,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455263547.673, "dur": 86.808, + "args": { + "External id": 471601,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9776 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 2338707, "tid": 2379456, + "ts": 6267455263809.066, "dur": 705.525, + "args": { + "External id": 471602,"Record function id": 0, "Ev Idx": 9777 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338707, "tid": 2379456, + "ts": 6267455263829.225, "dur": 670.586, + "args": { + "External id": 471603,"Record function id": 0, "Ev Idx": 9778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455263897.738, "dur": 6.588, + "args": { + "External id": 471604,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455263921.928, "dur": 36.674, + "args": { + "External id": 471605,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455263928.971, "dur": 4.012, + "args": { + "External id": 471606,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455263935.372, "dur": 0.704, + "args": { + "External id": 471607,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455263938.362, "dur": 0.779, + "args": { + "External id": 471608,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455263940.715, "dur": 0.746, + "args": { + "External id": 471609,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455263942.595, "dur": 0.450, + "args": { + "External id": 471610,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455263944.992, "dur": 0.530, + "args": { + "External id": 471611,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455263947.378, "dur": 0.415, + "args": { + "External id": 471612,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455263949.101, "dur": 0.468, + "args": { + "External id": 471613,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455263951.275, "dur": 3.049, + "args": { + "External id": 471614,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455263970.348, "dur": 47.103, + "args": { + "External id": 471615,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267455264052.402, "dur": 178.314, + "args": { + "External id": 471616,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455264108.347, "dur": 5.748, + "args": { + "External id": 471617,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267455264121.009, "dur": 13.142, + "args": { + "External id": 471618,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267455264126.276, "dur": 7.386, + "args": { + "External id": 471619,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455264130.709, "dur": 0.877, + "args": { + "External id": 471620,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455264143.594, "dur": 28.688, + "args": { + "External id": 471621,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455264146.800, "dur": 1.043, + "args": { + "External id": 471622,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455264149.672, "dur": 0.868, + "args": { + "External id": 471623,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455264152.904, "dur": 0.632, + "args": { + "External id": 471624,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455264155.201, "dur": 0.442, + "args": { + "External id": 471625,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455264156.870, "dur": 0.489, + "args": { + "External id": 471626,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455264159.297, "dur": 3.132, + "args": { + "External id": 471627,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455264164.324, "dur": 0.414, + "args": { + "External id": 471628,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455264165.821, "dur": 0.483, + "args": { + "External id": 471629,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455264168.434, "dur": 0.493, + "args": { + "External id": 471630,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455264185.012, "dur": 35.896, + "args": { + "External id": 471631,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455264285.113, "dur": 132.585, + "args": { + "External id": 471632,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455264313.725, "dur": 99.846, + "args": { + "External id": 471633,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9808, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455264325.206, "dur": 83.424, + "args": { + "External id": 471634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455264439.116, "dur": 2.065, + "args": { + "External id": 471635,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9810, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455264532.578, "dur": 2166.449, + "args": { + "External id": 471636,"Sequence number": 5036619, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9811 + } + }, + { + "ph": "f", "id": 424, "pid": 2338707, "tid": 2379456, "ts": 6267455264532.578, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455264683.176, "dur": 130.895, + "args": { + "External id": 471637,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455264863.549, "dur": 47.642, + "args": { + "External id": 471638,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267455264930.596, "dur": 61.586, + "args": { + "External id": 471639,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455265003.587, "dur": 41.012, + "args": { + "External id": 471640,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455265052.566, "dur": 90.330, + "args": { + "External id": 471641,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455265159.251, "dur": 42.384, + "args": { + "External id": 471642,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455265210.395, "dur": 38.896, + "args": { + "External id": 471643,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455265279.239, "dur": 31.842, + "args": { + "External id": 471644,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455265334.023, "dur": 36.363, + "args": { + "External id": 471645,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455265394.650, "dur": 24.089, + "args": { + "External id": 471646,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455265440.739, "dur": 18.628, + "args": { + "External id": 471647,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455265469.541, "dur": 50.229, + "args": { + "External id": 471648,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455265524.271, "dur": 40.049, + "args": { + "External id": 471649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267455265616.146, "dur": 345.569, + "args": { + "External id": 471650,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455265736.513, "dur": 12.235, + "args": { + "External id": 471651,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455265751.970, "dur": 3.630, + "args": { + "External id": 471652,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455265757.364, "dur": 6.058, + "args": { + "External id": 471653,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455265764.890, "dur": 2.705, + "args": { + "External id": 471654,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455265829.954, "dur": 6.273, + "args": { + "External id": 471655,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455265832.418, "dur": 3.558, + "args": { + "External id": 471656,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455265838.197, "dur": 39.530, + "args": { + "External id": 471657,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455265845.121, "dur": 2.075, + "args": { + "External id": 471658,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455265879.425, "dur": 10.073, + "args": { + "External id": 471659,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455265888.577, "dur": 0.826, + "args": { + "External id": 471660,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455265890.848, "dur": 19.854, + "args": { + "External id": 471661,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455265893.927, "dur": 0.783, + "args": { + "External id": 471662,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455266001.568, "dur": 34.664, + "args": { + "External id": 471663,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455266095.288, "dur": 26.304, + "args": { + "External id": 471664,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455266134.648, "dur": 63.348, + "args": { + "External id": 471665,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455266209.928, "dur": 49.021, + "args": { + "External id": 471666,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455266270.253, "dur": 26.096, + "args": { + "External id": 471667,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455266303.992, "dur": 37.410, + "args": { + "External id": 471668,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455266350.291, "dur": 33.258, + "args": { + "External id": 471669,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455266391.441, "dur": 35.421, + "args": { + "External id": 471670,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267455266451.900, "dur": 31.648, + "args": { + "External id": 471671,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455266500.783, "dur": 29.399, + "args": { + "External id": 471672,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455266545.746, "dur": 21.247, + "args": { + "External id": 471673,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455266583.798, "dur": 37.801, + "args": { + "External id": 471674,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267455266640.666, "dur": 20.709, + "args": { + "External id": 471675,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266750.658, "dur": 18.695, + "args": { + "External id": 471676,"Record function id": 0, "Ev Idx": 9851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266754.734, "dur": 13.345, + "args": { + "External id": 471677,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266759.835, "dur": 7.130, + "args": { + "External id": 471678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266761.989, "dur": 4.837, + "args": { + "External id": 471679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266774.136, "dur": 9.426, + "args": { + "External id": 471680,"Record function id": 0, "Ev Idx": 9855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266775.923, "dur": 7.157, + "args": { + "External id": 471681,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266776.842, "dur": 5.603, + "args": { + "External id": 471682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266778.356, "dur": 3.946, + "args": { + "External id": 471683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266787.643, "dur": 5.339, + "args": { + "External id": 471684,"Record function id": 0, "Ev Idx": 9859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266788.896, "dur": 3.597, + "args": { + "External id": 471685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266789.791, "dur": 2.163, + "args": { + "External id": 471686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266790.352, "dur": 1.523, + "args": { + "External id": 471687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266796.805, "dur": 4.525, + "args": { + "External id": 471688,"Record function id": 0, "Ev Idx": 9863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266798.023, "dur": 2.816, + "args": { + "External id": 471689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266798.732, "dur": 1.571, + "args": { + "External id": 471690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266799.266, "dur": 0.959, + "args": { + "External id": 471691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266804.944, "dur": 5.154, + "args": { + "External id": 471692,"Record function id": 0, "Ev Idx": 9867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266806.732, "dur": 2.907, + "args": { + "External id": 471693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266807.296, "dur": 1.443, + "args": { + "External id": 471694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266807.755, "dur": 0.901, + "args": { + "External id": 471695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266813.966, "dur": 5.042, + "args": { + "External id": 471696,"Record function id": 0, "Ev Idx": 9871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266815.678, "dur": 2.799, + "args": { + "External id": 471697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266816.285, "dur": 1.564, + "args": { + "External id": 471698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266816.864, "dur": 0.866, + "args": { + "External id": 471699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266822.897, "dur": 4.188, + "args": { + "External id": 471700,"Record function id": 0, "Ev Idx": 9875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266824.252, "dur": 2.325, + "args": { + "External id": 471701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266824.760, "dur": 1.337, + "args": { + "External id": 471702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266825.217, "dur": 0.808, + "args": { + "External id": 471703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266830.822, "dur": 4.995, + "args": { + "External id": 471704,"Record function id": 0, "Ev Idx": 9879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266832.292, "dur": 3.029, + "args": { + "External id": 471705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266833.250, "dur": 1.551, + "args": { + "External id": 471706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266833.763, "dur": 0.937, + "args": { + "External id": 471707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266839.957, "dur": 6.827, + "args": { + "External id": 471708,"Record function id": 0, "Ev Idx": 9883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455266841.011, "dur": 5.311, + "args": { + "External id": 471709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266841.554, "dur": 3.934, + "args": { + "External id": 471710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455266842.055, "dur": 3.302, + "args": { + "External id": 471711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455266851.575, "dur": 62833.810, + "args": { + "External id": 471712,"Record function id": 0, "Sequence number": 5036618, "Fwd thread id": 1, "Ev Idx": 9887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455266853.008, "dur": 62821.295, + "args": { + "External id": 471713,"Sequence number": 5036618, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9888 + } + }, + { + "ph": "f", "id": 425, "pid": 2338707, "tid": 2379456, "ts": 6267455266853.008, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 2338707, "tid": 2379456, + "ts": 6267455266887.445, "dur": 45.744, + "args": { + "External id": 471714,"Record function id": 0, "Ev Idx": 9889 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 2338707, "tid": 2379456, + "ts": 6267455266942.123, "dur": 72.355, + "args": { + "External id": 471715,"Record function id": 0, "Ev Idx": 9890 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 2338707, "tid": 2379456, + "ts": 6267455267021.485, "dur": 62642.784, + "args": { + "External id": 471716,"Record function id": 0, "Ev Idx": 9891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455267177.122, "dur": 8.553, + "args": { + "External id": 471717,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455267198.701, "dur": 6.138, + "args": { + "External id": 471718,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455267226.307, "dur": 61377.666, + "args": { + "External id": 471719,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455267242.911, "dur": 61333.880, + "args": { + "External id": 471720,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455267351.215, "dur": 20.652, + "args": { + "External id": 471721,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455267409.711, "dur": 61117.067, + "args": { + "External id": 471722,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455267413.354, "dur": 61112.317, + "args": { + "External id": 471723,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455267420.533, "dur": 14.646, + "args": { + "External id": 471724,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455267438.062, "dur": 61084.634, + "args": { + "External id": 471725,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455328734.579, "dur": 14.005, + "args": { + "External id": 471726,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455328738.655, "dur": 9.435, + "args": { + "External id": 471727,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455328785.893, "dur": 406.071, + "args": { + "External id": 471728,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455328821.600, "dur": 363.202, + "args": { + "External id": 471729,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9904, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455328837.530, "dur": 339.371, + "args": { + "External id": 471730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455329219.840, "dur": 2.922, + "args": { + "External id": 471731,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9906, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455329297.124, "dur": 7.589, + "args": { + "External id": 471732,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455329319.160, "dur": 41.658, + "args": { + "External id": 471733,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455329376.678, "dur": 1.909, + "args": { + "External id": 471734,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455329384.780, "dur": 15.175, + "args": { + "External id": 471735,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455329406.298, "dur": 1.171, + "args": { + "External id": 471736,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455329412.318, "dur": 14.637, + "args": { + "External id": 471737,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455329432.186, "dur": 3.232, + "args": { + "External id": 471738,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455329440.443, "dur": 12.972, + "args": { + "External id": 471739,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455329458.235, "dur": 0.993, + "args": { + "External id": 471740,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455329463.964, "dur": 13.110, + "args": { + "External id": 471741,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455329481.877, "dur": 1.381, + "args": { + "External id": 471742,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455329488.022, "dur": 13.389, + "args": { + "External id": 471743,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455329506.122, "dur": 1.399, + "args": { + "External id": 471744,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455329512.542, "dur": 13.063, + "args": { + "External id": 471745,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455329530.225, "dur": 1.219, + "args": { + "External id": 471746,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455329535.927, "dur": 14.357, + "args": { + "External id": 471747,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455329554.797, "dur": 1.140, + "args": { + "External id": 471748,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455329560.093, "dur": 13.176, + "args": { + "External id": 471749,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455329704.552, "dur": 3550.532, + "args": { + "External id": 471750,"Record function id": 0, "Ev Idx": 9925 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 2338707, "tid": 2379456, + "ts": 6267455329728.880, "dur": 1285.782, + "args": { + "External id": 471751,"Record function id": 0, "Ev Idx": 9926 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338707, "tid": 2379456, + "ts": 6267455329749.197, "dur": 446.510, + "args": { + "External id": 471752,"Record function id": 0, "Ev Idx": 9927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455329844.767, "dur": 5.612, + "args": { + "External id": 471753,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455329854.880, "dur": 1.065, + "args": { + "External id": 471754,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455329858.275, "dur": 3.259, + "args": { + "External id": 471755,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455329863.371, "dur": 0.939, + "args": { + "External id": 471756,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455329866.324, "dur": 0.865, + "args": { + "External id": 471757,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455329870.723, "dur": 1.082, + "args": { + "External id": 471758,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455329873.408, "dur": 0.927, + "args": { + "External id": 471759,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455329876.446, "dur": 2.000, + "args": { + "External id": 471760,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455329880.180, "dur": 0.817, + "args": { + "External id": 471761,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455329884.904, "dur": 0.738, + "args": { + "External id": 471762,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455329905.963, "dur": 246.534, + "args": { + "External id": 471763,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455329924.595, "dur": 220.712, + "args": { + "External id": 471764,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455329955.183, "dur": 19.676, + "args": { + "External id": 471765,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455329979.363, "dur": 129.510, + "args": { + "External id": 471766,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455329982.651, "dur": 125.678, + "args": { + "External id": 471767,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455329988.002, "dur": 7.191, + "args": { + "External id": 471768,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455329997.424, "dur": 109.269, + "args": { + "External id": 471769,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9944 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 2338707, "tid": 2379456, + "ts": 6267455330294.565, "dur": 707.788, + "args": { + "External id": 471770,"Record function id": 0, "Ev Idx": 9945 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338707, "tid": 2379456, + "ts": 6267455330316.032, "dur": 671.028, + "args": { + "External id": 471771,"Record function id": 0, "Ev Idx": 9946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455330385.367, "dur": 7.098, + "args": { + "External id": 471772,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455330411.709, "dur": 36.066, + "args": { + "External id": 471773,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330418.032, "dur": 2.187, + "args": { + "External id": 471774,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330422.314, "dur": 0.617, + "args": { + "External id": 471775,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330425.255, "dur": 0.757, + "args": { + "External id": 471776,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330427.993, "dur": 0.819, + "args": { + "External id": 471777,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330430.472, "dur": 0.596, + "args": { + "External id": 471778,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330433.319, "dur": 3.042, + "args": { + "External id": 471779,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330438.325, "dur": 0.602, + "args": { + "External id": 471780,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330440.040, "dur": 0.631, + "args": { + "External id": 471781,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330442.683, "dur": 0.711, + "args": { + "External id": 471782,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455330460.159, "dur": 50.016, + "args": { + "External id": 471783,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267455330544.758, "dur": 157.994, + "args": { + "External id": 471784,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455330557.188, "dur": 4.221, + "args": { + "External id": 471785,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267455330567.257, "dur": 12.284, + "args": { + "External id": 471786,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267455330572.781, "dur": 6.304, + "args": { + "External id": 471787,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330576.718, "dur": 0.816, + "args": { + "External id": 471788,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455330610.071, "dur": 30.145, + "args": { + "External id": 471789,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330614.080, "dur": 0.989, + "args": { + "External id": 471790,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330617.292, "dur": 0.868, + "args": { + "External id": 471791,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330620.378, "dur": 2.874, + "args": { + "External id": 471792,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330624.440, "dur": 0.512, + "args": { + "External id": 471793,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330626.814, "dur": 0.505, + "args": { + "External id": 471794,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330629.134, "dur": 0.730, + "args": { + "External id": 471795,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330631.373, "dur": 0.538, + "args": { + "External id": 471796,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330633.938, "dur": 0.425, + "args": { + "External id": 471797,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455330636.418, "dur": 0.389, + "args": { + "External id": 471798,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455330654.215, "dur": 39.196, + "args": { + "External id": 471799,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455330756.691, "dur": 147.006, + "args": { + "External id": 471800,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455330792.267, "dur": 107.549, + "args": { + "External id": 471801,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9976, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455330803.941, "dur": 90.845, + "args": { + "External id": 471802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455330925.073, "dur": 2.246, + "args": { + "External id": 471803,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9978, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455331023.553, "dur": 2206.284, + "args": { + "External id": 471804,"Sequence number": 5036617, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9979 + } + }, + { + "ph": "f", "id": 426, "pid": 2338707, "tid": 2379456, "ts": 6267455331023.553, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455331206.497, "dur": 129.879, + "args": { + "External id": 471805,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455331386.340, "dur": 50.982, + "args": { + "External id": 471806,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267455331457.978, "dur": 63.598, + "args": { + "External id": 471807,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455331536.274, "dur": 37.424, + "args": { + "External id": 471808,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455331581.068, "dur": 57.748, + "args": { + "External id": 471809,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455331650.696, "dur": 35.665, + "args": { + "External id": 471810,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455331694.862, "dur": 33.773, + "args": { + "External id": 471811,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455331764.845, "dur": 30.748, + "args": { + "External id": 471812,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455331818.229, "dur": 35.477, + "args": { + "External id": 471813,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455331879.880, "dur": 24.903, + "args": { + "External id": 471814,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455331921.776, "dur": 18.776, + "args": { + "External id": 471815,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455331950.576, "dur": 44.528, + "args": { + "External id": 471816,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455331999.650, "dur": 38.687, + "args": { + "External id": 471817,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267455332124.139, "dur": 340.974, + "args": { + "External id": 471818,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455332231.697, "dur": 10.404, + "args": { + "External id": 471819,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455332245.491, "dur": 4.149, + "args": { + "External id": 471820,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455332251.247, "dur": 2.591, + "args": { + "External id": 471821,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455332255.433, "dur": 3.216, + "args": { + "External id": 471822,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455332316.896, "dur": 6.887, + "args": { + "External id": 471823,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455332319.697, "dur": 3.826, + "args": { + "External id": 471824,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455332333.066, "dur": 45.581, + "args": { + "External id": 471825,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455332339.910, "dur": 4.664, + "args": { + "External id": 471826,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455332380.337, "dur": 2.373, + "args": { + "External id": 471827,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455332381.820, "dur": 0.804, + "args": { + "External id": 471828,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455332383.870, "dur": 18.923, + "args": { + "External id": 471829,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455332386.328, "dur": 0.896, + "args": { + "External id": 471830,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455332507.881, "dur": 34.700, + "args": { + "External id": 471831,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455332565.571, "dur": 21.779, + "args": { + "External id": 471832,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455332617.681, "dur": 66.834, + "args": { + "External id": 471833,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455332694.086, "dur": 50.612, + "args": { + "External id": 471834,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455332760.211, "dur": 25.744, + "args": { + "External id": 471835,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455332794.066, "dur": 37.620, + "args": { + "External id": 471836,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455332840.717, "dur": 35.235, + "args": { + "External id": 471837,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455332884.427, "dur": 36.147, + "args": { + "External id": 471838,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267455332944.023, "dur": 29.935, + "args": { + "External id": 471839,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455332993.508, "dur": 31.731, + "args": { + "External id": 471840,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455333041.299, "dur": 66.365, + "args": { + "External id": 471841,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455333132.183, "dur": 22.494, + "args": { + "External id": 471842,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267455333170.508, "dur": 18.973, + "args": { + "External id": 471843,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333281.861, "dur": 18.566, + "args": { + "External id": 471844,"Record function id": 0, "Ev Idx": 10019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333285.609, "dur": 13.612, + "args": { + "External id": 471845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333290.675, "dur": 7.462, + "args": { + "External id": 471846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333293.167, "dur": 4.821, + "args": { + "External id": 471847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333304.979, "dur": 6.214, + "args": { + "External id": 471848,"Record function id": 0, "Ev Idx": 10023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333306.501, "dur": 4.197, + "args": { + "External id": 471849,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333307.320, "dur": 2.669, + "args": { + "External id": 471850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333308.561, "dur": 1.309, + "args": { + "External id": 471851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333315.020, "dur": 5.393, + "args": { + "External id": 471852,"Record function id": 0, "Ev Idx": 10027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333316.469, "dur": 3.451, + "args": { + "External id": 471853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333317.406, "dur": 1.958, + "args": { + "External id": 471854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333318.211, "dur": 1.074, + "args": { + "External id": 471855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333324.300, "dur": 4.534, + "args": { + "External id": 471856,"Record function id": 0, "Ev Idx": 10031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333325.583, "dur": 2.778, + "args": { + "External id": 471857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333326.190, "dur": 1.575, + "args": { + "External id": 471858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333326.865, "dur": 0.819, + "args": { + "External id": 471859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333332.504, "dur": 6.344, + "args": { + "External id": 471860,"Record function id": 0, "Ev Idx": 10035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333333.732, "dur": 4.634, + "args": { + "External id": 471861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333334.266, "dur": 3.587, + "args": { + "External id": 471862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333334.606, "dur": 3.136, + "args": { + "External id": 471863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333342.601, "dur": 5.152, + "args": { + "External id": 471864,"Record function id": 0, "Ev Idx": 10039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333344.096, "dur": 3.177, + "args": { + "External id": 471865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333344.670, "dur": 1.643, + "args": { + "External id": 471866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333345.166, "dur": 1.033, + "args": { + "External id": 471867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333351.636, "dur": 4.013, + "args": { + "External id": 471868,"Record function id": 0, "Ev Idx": 10043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333352.848, "dur": 2.295, + "args": { + "External id": 471869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333353.460, "dur": 1.186, + "args": { + "External id": 471870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333353.793, "dur": 0.780, + "args": { + "External id": 471871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333359.328, "dur": 4.067, + "args": { + "External id": 471872,"Record function id": 0, "Ev Idx": 10047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333360.452, "dur": 2.475, + "args": { + "External id": 471873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333360.993, "dur": 1.400, + "args": { + "External id": 471874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333361.506, "dur": 0.778, + "args": { + "External id": 471875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333367.013, "dur": 4.948, + "args": { + "External id": 471876,"Record function id": 0, "Ev Idx": 10051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455333368.327, "dur": 3.146, + "args": { + "External id": 471877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333369.180, "dur": 1.782, + "args": { + "External id": 471878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455333370.013, "dur": 0.817, + "args": { + "External id": 471879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455333377.399, "dur": 61085.193, + "args": { + "External id": 471880,"Record function id": 0, "Sequence number": 5036616, "Fwd thread id": 1, "Ev Idx": 10055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455333379.126, "dur": 61072.902, + "args": { + "External id": 471881,"Sequence number": 5036616, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10056 + } + }, + { + "ph": "f", "id": 427, "pid": 2338707, "tid": 2379456, "ts": 6267455333379.126, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 2338707, "tid": 2379456, + "ts": 6267455333414.825, "dur": 46.223, + "args": { + "External id": 471882,"Record function id": 0, "Ev Idx": 10057 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 2338707, "tid": 2379456, + "ts": 6267455333473.473, "dur": 77.073, + "args": { + "External id": 471883,"Record function id": 0, "Ev Idx": 10058 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 2338707, "tid": 2379456, + "ts": 6267455333557.687, "dur": 60885.087, + "args": { + "External id": 471884,"Record function id": 0, "Ev Idx": 10059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455333687.777, "dur": 8.972, + "args": { + "External id": 471885,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455333709.651, "dur": 5.681, + "args": { + "External id": 471886,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455333732.981, "dur": 59520.914, + "args": { + "External id": 471887,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455333753.216, "dur": 59485.446, + "args": { + "External id": 471888,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455333864.567, "dur": 18.066, + "args": { + "External id": 471889,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455333908.877, "dur": 59273.384, + "args": { + "External id": 471890,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455333912.186, "dur": 59268.853, + "args": { + "External id": 471891,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455333918.061, "dur": 10.964, + "args": { + "External id": 471892,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455333931.765, "dur": 59243.075, + "args": { + "External id": 471893,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455393378.737, "dur": 16.072, + "args": { + "External id": 471894,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455393382.984, "dur": 11.360, + "args": { + "External id": 471895,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455393430.545, "dur": 508.839, + "args": { + "External id": 471896,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455393466.761, "dur": 466.004, + "args": { + "External id": 471897,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10072, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455393481.405, "dur": 444.224, + "args": { + "External id": 471898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455393979.766, "dur": 2.913, + "args": { + "External id": 471899,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10074, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455394095.849, "dur": 8.695, + "args": { + "External id": 471900,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455394120.153, "dur": 47.888, + "args": { + "External id": 471901,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455394181.364, "dur": 2.065, + "args": { + "External id": 471902,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455394189.633, "dur": 16.496, + "args": { + "External id": 471903,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455394212.607, "dur": 1.153, + "args": { + "External id": 471904,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455394218.404, "dur": 13.127, + "args": { + "External id": 471905,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455394237.903, "dur": 1.180, + "args": { + "External id": 471906,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455394243.400, "dur": 12.414, + "args": { + "External id": 471907,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455394260.987, "dur": 1.402, + "args": { + "External id": 471908,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455394266.647, "dur": 11.762, + "args": { + "External id": 471909,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455394283.302, "dur": 1.507, + "args": { + "External id": 471910,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455394289.127, "dur": 11.819, + "args": { + "External id": 471911,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455394306.012, "dur": 1.372, + "args": { + "External id": 471912,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455394311.361, "dur": 11.081, + "args": { + "External id": 471913,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455394326.880, "dur": 3.473, + "args": { + "External id": 471914,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455394335.427, "dur": 12.074, + "args": { + "External id": 471915,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455394351.675, "dur": 1.200, + "args": { + "External id": 471916,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455394358.766, "dur": 11.457, + "args": { + "External id": 471917,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455394481.742, "dur": 3498.534, + "args": { + "External id": 471918,"Record function id": 0, "Ev Idx": 10093 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 2338707, "tid": 2379456, + "ts": 6267455394506.238, "dur": 1281.988, + "args": { + "External id": 471919,"Record function id": 0, "Ev Idx": 10094 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338707, "tid": 2379456, + "ts": 6267455394523.568, "dur": 409.864, + "args": { + "External id": 471920,"Record function id": 0, "Ev Idx": 10095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455394637.568, "dur": 6.240, + "args": { + "External id": 471921,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455394648.581, "dur": 1.258, + "args": { + "External id": 471922,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455394653.382, "dur": 1.051, + "args": { + "External id": 471923,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455394656.270, "dur": 1.188, + "args": { + "External id": 471924,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455394659.523, "dur": 1.006, + "args": { + "External id": 471925,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455394662.122, "dur": 0.919, + "args": { + "External id": 471926,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455394664.695, "dur": 3.197, + "args": { + "External id": 471927,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455394670.003, "dur": 1.673, + "args": { + "External id": 471928,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455394675.295, "dur": 0.648, + "args": { + "External id": 471929,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455394677.544, "dur": 0.912, + "args": { + "External id": 471930,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455394699.733, "dur": 193.668, + "args": { + "External id": 471931,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455394719.838, "dur": 168.009, + "args": { + "External id": 471932,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455394750.329, "dur": 17.193, + "args": { + "External id": 471933,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455394771.682, "dur": 83.920, + "args": { + "External id": 471934,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455394775.485, "dur": 79.773, + "args": { + "External id": 471935,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455394780.558, "dur": 7.250, + "args": { + "External id": 471936,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455394792.150, "dur": 62.405, + "args": { + "External id": 471937,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10112 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 2338707, "tid": 2379456, + "ts": 6267455395029.041, "dur": 750.269, + "args": { + "External id": 471938,"Record function id": 0, "Ev Idx": 10113 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338707, "tid": 2379456, + "ts": 6267455395050.549, "dur": 712.941, + "args": { + "External id": 471939,"Record function id": 0, "Ev Idx": 10114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455395164.035, "dur": 8.744, + "args": { + "External id": 471940,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455395191.055, "dur": 37.625, + "args": { + "External id": 471941,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395198.044, "dur": 2.391, + "args": { + "External id": 471942,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395202.728, "dur": 2.938, + "args": { + "External id": 471943,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395208.078, "dur": 0.652, + "args": { + "External id": 471944,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395210.972, "dur": 0.785, + "args": { + "External id": 471945,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395213.110, "dur": 0.605, + "args": { + "External id": 471946,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395215.928, "dur": 0.368, + "args": { + "External id": 471947,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395218.461, "dur": 0.399, + "args": { + "External id": 471948,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395220.189, "dur": 0.781, + "args": { + "External id": 471949,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395223.266, "dur": 0.780, + "args": { + "External id": 471950,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455395241.517, "dur": 54.638, + "args": { + "External id": 471951,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267455395332.706, "dur": 128.664, + "args": { + "External id": 471952,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455395345.603, "dur": 5.740, + "args": { + "External id": 471953,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267455395357.290, "dur": 12.326, + "args": { + "External id": 471954,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267455395362.724, "dur": 6.433, + "args": { + "External id": 471955,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395366.704, "dur": 0.735, + "args": { + "External id": 471956,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455395377.560, "dur": 28.278, + "args": { + "External id": 471957,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395379.915, "dur": 0.767, + "args": { + "External id": 471958,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395382.380, "dur": 0.837, + "args": { + "External id": 471959,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395385.027, "dur": 0.968, + "args": { + "External id": 471960,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395387.850, "dur": 0.351, + "args": { + "External id": 471961,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395389.842, "dur": 0.778, + "args": { + "External id": 471962,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395392.157, "dur": 0.601, + "args": { + "External id": 471963,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395394.939, "dur": 2.880, + "args": { + "External id": 471964,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395399.187, "dur": 0.709, + "args": { + "External id": 471965,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455395402.038, "dur": 0.669, + "args": { + "External id": 471966,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455395418.119, "dur": 34.256, + "args": { + "External id": 471967,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455395514.554, "dur": 158.597, + "args": { + "External id": 471968,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455395542.629, "dur": 125.394, + "args": { + "External id": 471969,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10144, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455395555.286, "dur": 107.309, + "args": { + "External id": 471970,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455395696.376, "dur": 2.655, + "args": { + "External id": 471971,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10146, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455395796.715, "dur": 2158.909, + "args": { + "External id": 471972,"Sequence number": 5036615, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10147 + } + }, + { + "ph": "f", "id": 428, "pid": 2338707, "tid": 2379456, "ts": 6267455395796.715, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455395926.591, "dur": 172.661, + "args": { + "External id": 471973,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455396158.657, "dur": 50.167, + "args": { + "External id": 471974,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267455396232.710, "dur": 70.358, + "args": { + "External id": 471975,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455396315.166, "dur": 39.364, + "args": { + "External id": 471976,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455396362.303, "dur": 40.817, + "args": { + "External id": 471977,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455396412.915, "dur": 36.729, + "args": { + "External id": 471978,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455396457.000, "dur": 33.952, + "args": { + "External id": 471979,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455396521.773, "dur": 27.605, + "args": { + "External id": 471980,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455396569.226, "dur": 50.236, + "args": { + "External id": 471981,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455396649.993, "dur": 26.033, + "args": { + "External id": 471982,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455396692.432, "dur": 18.981, + "args": { + "External id": 471983,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455396725.346, "dur": 51.830, + "args": { + "External id": 471984,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455396782.165, "dur": 60.666, + "args": { + "External id": 471985,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267455396893.555, "dur": 354.548, + "args": { + "External id": 471986,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455396999.618, "dur": 8.012, + "args": { + "External id": 471987,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455397010.205, "dur": 6.009, + "args": { + "External id": 471988,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455397017.883, "dur": 2.865, + "args": { + "External id": 471989,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455397022.467, "dur": 2.488, + "args": { + "External id": 471990,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455397117.970, "dur": 7.075, + "args": { + "External id": 471991,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455397120.616, "dur": 3.950, + "args": { + "External id": 471992,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455397127.043, "dur": 41.151, + "args": { + "External id": 471993,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455397134.001, "dur": 2.040, + "args": { + "External id": 471994,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455397170.301, "dur": 2.594, + "args": { + "External id": 471995,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455397171.872, "dur": 0.936, + "args": { + "External id": 471996,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455397174.213, "dur": 19.089, + "args": { + "External id": 471997,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455397177.109, "dur": 0.766, + "args": { + "External id": 471998,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455397290.910, "dur": 34.563, + "args": { + "External id": 471999,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455397345.489, "dur": 21.925, + "args": { + "External id": 472000,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455397377.790, "dur": 60.677, + "args": { + "External id": 472001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455397448.124, "dur": 48.062, + "args": { + "External id": 472002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455397508.850, "dur": 25.501, + "args": { + "External id": 472003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455397542.010, "dur": 36.805, + "args": { + "External id": 472004,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455397588.010, "dur": 64.351, + "args": { + "External id": 472005,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455397665.649, "dur": 41.124, + "args": { + "External id": 472006,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267455397730.419, "dur": 30.170, + "args": { + "External id": 472007,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455397780.504, "dur": 28.126, + "args": { + "External id": 472008,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455397823.356, "dur": 22.300, + "args": { + "External id": 472009,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455397862.751, "dur": 19.213, + "args": { + "External id": 472010,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267455397896.970, "dur": 20.116, + "args": { + "External id": 472011,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398011.090, "dur": 18.490, + "args": { + "External id": 472012,"Record function id": 0, "Ev Idx": 10187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398014.858, "dur": 13.600, + "args": { + "External id": 472013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398020.068, "dur": 7.229, + "args": { + "External id": 472014,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398022.471, "dur": 4.673, + "args": { + "External id": 472015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398034.387, "dur": 6.326, + "args": { + "External id": 472016,"Record function id": 0, "Ev Idx": 10191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398036.275, "dur": 3.850, + "args": { + "External id": 472017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398037.006, "dur": 2.477, + "args": { + "External id": 472018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398038.107, "dur": 1.284, + "args": { + "External id": 472019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398044.712, "dur": 5.498, + "args": { + "External id": 472020,"Record function id": 0, "Ev Idx": 10195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398046.218, "dur": 3.506, + "args": { + "External id": 472021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398047.001, "dur": 2.186, + "args": { + "External id": 472022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398047.862, "dur": 1.250, + "args": { + "External id": 472023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398054.261, "dur": 51.250, + "args": { + "External id": 472024,"Record function id": 0, "Ev Idx": 10199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398098.631, "dur": 5.726, + "args": { + "External id": 472025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398100.192, "dur": 3.086, + "args": { + "External id": 472026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398101.144, "dur": 1.832, + "args": { + "External id": 472027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398111.537, "dur": 9.321, + "args": { + "External id": 472028,"Record function id": 0, "Ev Idx": 10203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398113.630, "dur": 6.742, + "args": { + "External id": 472029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398114.378, "dur": 5.063, + "args": { + "External id": 472030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398115.678, "dur": 3.688, + "args": { + "External id": 472031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398124.627, "dur": 5.721, + "args": { + "External id": 472032,"Record function id": 0, "Ev Idx": 10207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398126.411, "dur": 3.440, + "args": { + "External id": 472033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398127.022, "dur": 2.238, + "args": { + "External id": 472034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398127.865, "dur": 1.280, + "args": { + "External id": 472035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398134.365, "dur": 4.845, + "args": { + "External id": 472036,"Record function id": 0, "Ev Idx": 10211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398136.011, "dur": 2.700, + "args": { + "External id": 472037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398136.673, "dur": 1.518, + "args": { + "External id": 472038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398137.382, "dur": 0.718, + "args": { + "External id": 472039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398143.084, "dur": 4.649, + "args": { + "External id": 472040,"Record function id": 0, "Ev Idx": 10215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398144.665, "dur": 2.580, + "args": { + "External id": 472041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398145.465, "dur": 1.220, + "args": { + "External id": 472042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398145.781, "dur": 0.797, + "args": { + "External id": 472043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398151.345, "dur": 4.760, + "args": { + "External id": 472044,"Record function id": 0, "Ev Idx": 10219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455398152.880, "dur": 2.701, + "args": { + "External id": 472045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398153.448, "dur": 1.634, + "args": { + "External id": 472046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455398154.161, "dur": 0.797, + "args": { + "External id": 472047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455398160.424, "dur": 66361.906, + "args": { + "External id": 472048,"Record function id": 0, "Sequence number": 5036614, "Fwd thread id": 1, "Ev Idx": 10223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455398161.785, "dur": 66349.719, + "args": { + "External id": 472049,"Sequence number": 5036614, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10224 + } + }, + { + "ph": "f", "id": 429, "pid": 2338707, "tid": 2379456, "ts": 6267455398161.785, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 2338707, "tid": 2379456, + "ts": 6267455398198.642, "dur": 45.615, + "args": { + "External id": 472050,"Record function id": 0, "Ev Idx": 10225 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 2338707, "tid": 2379456, + "ts": 6267455398285.236, "dur": 79.150, + "args": { + "External id": 472051,"Record function id": 0, "Ev Idx": 10226 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 2338707, "tid": 2379456, + "ts": 6267455398371.959, "dur": 66130.856, + "args": { + "External id": 472052,"Record function id": 0, "Ev Idx": 10227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455398481.902, "dur": 9.677, + "args": { + "External id": 472053,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455398517.756, "dur": 7.657, + "args": { + "External id": 472054,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455398546.219, "dur": 64760.224, + "args": { + "External id": 472055,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455398566.865, "dur": 64723.779, + "args": { + "External id": 472056,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455398701.296, "dur": 23.613, + "args": { + "External id": 472057,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455398751.898, "dur": 64483.740, + "args": { + "External id": 472058,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455398755.421, "dur": 64479.187, + "args": { + "External id": 472059,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455398761.108, "dur": 14.122, + "args": { + "External id": 472060,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455398777.631, "dur": 64451.005, + "args": { + "External id": 472061,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455463436.648, "dur": 17.883, + "args": { + "External id": 472062,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455463441.881, "dur": 12.149, + "args": { + "External id": 472063,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455463492.045, "dur": 520.625, + "args": { + "External id": 472064,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455463585.142, "dur": 420.947, + "args": { + "External id": 472065,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10240, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455463614.872, "dur": 384.634, + "args": { + "External id": 472066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455464040.332, "dur": 2.449, + "args": { + "External id": 472067,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10242, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455464164.486, "dur": 8.669, + "args": { + "External id": 472068,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455464188.540, "dur": 44.594, + "args": { + "External id": 472069,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455464245.908, "dur": 2.039, + "args": { + "External id": 472070,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455464254.448, "dur": 14.558, + "args": { + "External id": 472071,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455464276.026, "dur": 1.169, + "args": { + "External id": 472072,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455464281.855, "dur": 11.892, + "args": { + "External id": 472073,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455464300.244, "dur": 1.414, + "args": { + "External id": 472074,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455464306.252, "dur": 12.691, + "args": { + "External id": 472075,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455464324.088, "dur": 1.153, + "args": { + "External id": 472076,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455464329.603, "dur": 11.124, + "args": { + "External id": 472077,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455464346.257, "dur": 1.211, + "args": { + "External id": 472078,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455464351.932, "dur": 11.432, + "args": { + "External id": 472079,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455464367.956, "dur": 1.345, + "args": { + "External id": 472080,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455464373.780, "dur": 11.239, + "args": { + "External id": 472081,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455464392.747, "dur": 3.323, + "args": { + "External id": 472082,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455464400.470, "dur": 11.594, + "args": { + "External id": 472083,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455464417.199, "dur": 1.136, + "args": { + "External id": 472084,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455464422.299, "dur": 11.068, + "args": { + "External id": 472085,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455464541.128, "dur": 3571.530, + "args": { + "External id": 472086,"Record function id": 0, "Ev Idx": 10261 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 2338707, "tid": 2379456, + "ts": 6267455464564.458, "dur": 1302.613, + "args": { + "External id": 472087,"Record function id": 0, "Ev Idx": 10262 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338707, "tid": 2379456, + "ts": 6267455464582.748, "dur": 418.208, + "args": { + "External id": 472088,"Record function id": 0, "Ev Idx": 10263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455464705.912, "dur": 5.843, + "args": { + "External id": 472089,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455464717.177, "dur": 1.288, + "args": { + "External id": 472090,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455464720.520, "dur": 1.207, + "args": { + "External id": 472091,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455464723.950, "dur": 1.295, + "args": { + "External id": 472092,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455464726.768, "dur": 1.046, + "args": { + "External id": 472093,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455464729.412, "dur": 1.372, + "args": { + "External id": 472094,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455464733.242, "dur": 3.085, + "args": { + "External id": 472095,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455464738.014, "dur": 1.986, + "args": { + "External id": 472096,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455464743.971, "dur": 4.600, + "args": { + "External id": 472097,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455464750.901, "dur": 0.990, + "args": { + "External id": 472098,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455464773.016, "dur": 193.491, + "args": { + "External id": 472099,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455464792.700, "dur": 167.727, + "args": { + "External id": 472100,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455464822.726, "dur": 17.805, + "args": { + "External id": 472101,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455464844.836, "dur": 85.613, + "args": { + "External id": 472102,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455464848.700, "dur": 81.283, + "args": { + "External id": 472103,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455464854.377, "dur": 7.131, + "args": { + "External id": 472104,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455464865.898, "dur": 63.203, + "args": { + "External id": 472105,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10280 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 2338707, "tid": 2379456, + "ts": 6267455465141.326, "dur": 716.382, + "args": { + "External id": 472106,"Record function id": 0, "Ev Idx": 10281 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338707, "tid": 2379456, + "ts": 6267455465163.261, "dur": 679.383, + "args": { + "External id": 472107,"Record function id": 0, "Ev Idx": 10282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455465235.210, "dur": 6.989, + "args": { + "External id": 472108,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455465260.434, "dur": 34.832, + "args": { + "External id": 472109,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465266.741, "dur": 2.064, + "args": { + "External id": 472110,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465271.525, "dur": 3.051, + "args": { + "External id": 472111,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465276.764, "dur": 0.655, + "args": { + "External id": 472112,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465278.792, "dur": 0.696, + "args": { + "External id": 472113,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465281.593, "dur": 0.550, + "args": { + "External id": 472114,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465283.656, "dur": 0.777, + "args": { + "External id": 472115,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465285.787, "dur": 0.483, + "args": { + "External id": 472116,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465288.217, "dur": 0.643, + "args": { + "External id": 472117,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465290.319, "dur": 0.545, + "args": { + "External id": 472118,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455465317.377, "dur": 55.413, + "args": { + "External id": 472119,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267455465409.873, "dur": 124.391, + "args": { + "External id": 472120,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455465422.329, "dur": 6.852, + "args": { + "External id": 472121,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267455465435.390, "dur": 12.212, + "args": { + "External id": 472122,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267455465440.703, "dur": 6.424, + "args": { + "External id": 472123,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465444.630, "dur": 0.721, + "args": { + "External id": 472124,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455465455.476, "dur": 26.114, + "args": { + "External id": 472125,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465457.218, "dur": 0.752, + "args": { + "External id": 472126,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465459.746, "dur": 0.463, + "args": { + "External id": 472127,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465461.805, "dur": 0.652, + "args": { + "External id": 472128,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465464.430, "dur": 0.454, + "args": { + "External id": 472129,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465466.830, "dur": 0.746, + "args": { + "External id": 472130,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465469.067, "dur": 0.453, + "args": { + "External id": 472131,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465471.569, "dur": 2.770, + "args": { + "External id": 472132,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465476.092, "dur": 0.484, + "args": { + "External id": 472133,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455465477.901, "dur": 0.412, + "args": { + "External id": 472134,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455465493.282, "dur": 32.160, + "args": { + "External id": 472135,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455465587.006, "dur": 170.669, + "args": { + "External id": 472136,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455465641.053, "dur": 111.998, + "args": { + "External id": 472137,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10312, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455465653.661, "dur": 94.483, + "args": { + "External id": 472138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455465778.431, "dur": 2.388, + "args": { + "External id": 472139,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10314, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455465875.588, "dur": 2174.743, + "args": { + "External id": 472140,"Sequence number": 5036613, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10315 + } + }, + { + "ph": "f", "id": 430, "pid": 2338707, "tid": 2379456, "ts": 6267455465875.588, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455466007.832, "dur": 185.966, + "args": { + "External id": 472141,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455466248.692, "dur": 47.564, + "args": { + "External id": 472142,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267455466319.889, "dur": 72.120, + "args": { + "External id": 472143,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455466404.277, "dur": 38.317, + "args": { + "External id": 472144,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455466450.070, "dur": 38.557, + "args": { + "External id": 472145,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455466498.174, "dur": 32.833, + "args": { + "External id": 472146,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455466538.976, "dur": 33.282, + "args": { + "External id": 472147,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455466620.431, "dur": 30.639, + "args": { + "External id": 472148,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455466674.707, "dur": 35.131, + "args": { + "External id": 472149,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455466736.384, "dur": 25.029, + "args": { + "External id": 472150,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455466775.890, "dur": 18.501, + "args": { + "External id": 472151,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455466807.248, "dur": 51.882, + "args": { + "External id": 472152,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455466864.927, "dur": 40.095, + "args": { + "External id": 472153,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267455466953.624, "dur": 373.653, + "args": { + "External id": 472154,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455467101.571, "dur": 11.091, + "args": { + "External id": 472155,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455467116.469, "dur": 5.826, + "args": { + "External id": 472156,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455467123.841, "dur": 3.050, + "args": { + "External id": 472157,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455467128.237, "dur": 2.546, + "args": { + "External id": 472158,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455467195.919, "dur": 6.314, + "args": { + "External id": 472159,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455467198.284, "dur": 3.724, + "args": { + "External id": 472160,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455467204.208, "dur": 40.365, + "args": { + "External id": 472161,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455467211.267, "dur": 2.324, + "args": { + "External id": 472162,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455467249.293, "dur": 1.889, + "args": { + "External id": 472163,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455467250.275, "dur": 0.812, + "args": { + "External id": 472164,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455467252.435, "dur": 18.759, + "args": { + "External id": 472165,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455467254.734, "dur": 0.739, + "args": { + "External id": 472166,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455467373.068, "dur": 34.441, + "args": { + "External id": 472167,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455467430.014, "dur": 20.738, + "args": { + "External id": 472168,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455467461.386, "dur": 66.827, + "args": { + "External id": 472169,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455467537.502, "dur": 49.640, + "args": { + "External id": 472170,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455467621.922, "dur": 33.163, + "args": { + "External id": 472171,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455467664.078, "dur": 38.754, + "args": { + "External id": 472172,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455467712.357, "dur": 37.474, + "args": { + "External id": 472173,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455467758.917, "dur": 40.027, + "args": { + "External id": 472174,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267455467821.006, "dur": 29.985, + "args": { + "External id": 472175,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455467868.627, "dur": 29.663, + "args": { + "External id": 472176,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455467914.769, "dur": 22.173, + "args": { + "External id": 472177,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455467952.621, "dur": 19.121, + "args": { + "External id": 472178,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267455467993.381, "dur": 20.042, + "args": { + "External id": 472179,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468142.022, "dur": 18.685, + "args": { + "External id": 472180,"Record function id": 0, "Ev Idx": 10355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468145.911, "dur": 13.501, + "args": { + "External id": 472181,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468150.902, "dur": 7.296, + "args": { + "External id": 472182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468153.003, "dur": 5.035, + "args": { + "External id": 472183,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468165.225, "dur": 6.746, + "args": { + "External id": 472184,"Record function id": 0, "Ev Idx": 10359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468166.960, "dur": 4.523, + "args": { + "External id": 472185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468167.991, "dur": 2.789, + "args": { + "External id": 472186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468169.236, "dur": 1.451, + "args": { + "External id": 472187,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468175.889, "dur": 5.124, + "args": { + "External id": 472188,"Record function id": 0, "Ev Idx": 10363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468177.277, "dur": 3.262, + "args": { + "External id": 472189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468178.027, "dur": 1.970, + "args": { + "External id": 472190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468178.631, "dur": 1.289, + "args": { + "External id": 472191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468184.815, "dur": 7.052, + "args": { + "External id": 472192,"Record function id": 0, "Ev Idx": 10367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468185.875, "dur": 5.494, + "args": { + "External id": 472193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468186.803, "dur": 4.086, + "args": { + "External id": 472194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468187.431, "dur": 3.376, + "args": { + "External id": 472195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468195.650, "dur": 4.947, + "args": { + "External id": 472196,"Record function id": 0, "Ev Idx": 10371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468197.071, "dur": 3.026, + "args": { + "External id": 472197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468197.798, "dur": 1.605, + "args": { + "External id": 472198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468198.112, "dur": 1.069, + "args": { + "External id": 472199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468204.402, "dur": 5.614, + "args": { + "External id": 472200,"Record function id": 0, "Ev Idx": 10375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468206.230, "dur": 3.271, + "args": { + "External id": 472201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468207.396, "dur": 1.594, + "args": { + "External id": 472202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468208.077, "dur": 0.794, + "args": { + "External id": 472203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468213.819, "dur": 4.833, + "args": { + "External id": 472204,"Record function id": 0, "Ev Idx": 10379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468215.162, "dur": 3.001, + "args": { + "External id": 472205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468216.223, "dur": 1.423, + "args": { + "External id": 472206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468216.756, "dur": 0.798, + "args": { + "External id": 472207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468222.284, "dur": 4.647, + "args": { + "External id": 472208,"Record function id": 0, "Ev Idx": 10383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468223.626, "dur": 2.851, + "args": { + "External id": 472209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468224.431, "dur": 1.563, + "args": { + "External id": 472210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468224.917, "dur": 0.963, + "args": { + "External id": 472211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468231.302, "dur": 4.865, + "args": { + "External id": 472212,"Record function id": 0, "Ev Idx": 10387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455468232.905, "dur": 2.661, + "args": { + "External id": 472213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468233.460, "dur": 1.631, + "args": { + "External id": 472214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455468234.077, "dur": 0.880, + "args": { + "External id": 472215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455468241.051, "dur": 63949.471, + "args": { + "External id": 472216,"Record function id": 0, "Sequence number": 5036612, "Fwd thread id": 1, "Ev Idx": 10391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455468242.838, "dur": 63935.889, + "args": { + "External id": 472217,"Sequence number": 5036612, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10392 + } + }, + { + "ph": "f", "id": 431, "pid": 2338707, "tid": 2379456, "ts": 6267455468242.838, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 2338707, "tid": 2379456, + "ts": 6267455468280.875, "dur": 47.149, + "args": { + "External id": 472218,"Record function id": 0, "Ev Idx": 10393 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 2338707, "tid": 2379456, + "ts": 6267455468337.792, "dur": 73.535, + "args": { + "External id": 472219,"Record function id": 0, "Ev Idx": 10394 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 2338707, "tid": 2379456, + "ts": 6267455468418.563, "dur": 63749.312, + "args": { + "External id": 472220,"Record function id": 0, "Ev Idx": 10395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455468526.187, "dur": 8.327, + "args": { + "External id": 472221,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455468546.170, "dur": 5.696, + "args": { + "External id": 472222,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455468569.317, "dur": 62332.671, + "args": { + "External id": 472223,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455468585.387, "dur": 62299.533, + "args": { + "External id": 472224,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455468709.107, "dur": 21.678, + "args": { + "External id": 472225,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455468771.033, "dur": 62057.157, + "args": { + "External id": 472226,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455468774.817, "dur": 62050.993, + "args": { + "External id": 472227,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455468781.949, "dur": 18.368, + "args": { + "External id": 472228,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455468806.719, "dur": 62016.254, + "args": { + "External id": 472229,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455531048.367, "dur": 65.467, + "args": { + "External id": 472230,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455531101.404, "dur": 11.573, + "args": { + "External id": 472231,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455531160.587, "dur": 490.235, + "args": { + "External id": 472232,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455531204.746, "dur": 439.013, + "args": { + "External id": 472233,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10408, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455531221.103, "dur": 414.343, + "args": { + "External id": 472234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455531681.051, "dur": 3.200, + "args": { + "External id": 472235,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10410, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455531761.006, "dur": 8.120, + "args": { + "External id": 472236,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455531783.237, "dur": 40.841, + "args": { + "External id": 472237,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455531836.518, "dur": 1.714, + "args": { + "External id": 472238,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455531844.586, "dur": 14.861, + "args": { + "External id": 472239,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455531869.463, "dur": 1.223, + "args": { + "External id": 472240,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455531875.499, "dur": 14.153, + "args": { + "External id": 472241,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455531895.939, "dur": 1.322, + "args": { + "External id": 472242,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455531901.260, "dur": 13.894, + "args": { + "External id": 472243,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455531920.069, "dur": 4.398, + "args": { + "External id": 472244,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455531929.480, "dur": 13.567, + "args": { + "External id": 472245,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455531948.109, "dur": 1.373, + "args": { + "External id": 472246,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455531953.562, "dur": 13.376, + "args": { + "External id": 472247,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455531971.547, "dur": 3.706, + "args": { + "External id": 472248,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455531979.616, "dur": 13.253, + "args": { + "External id": 472249,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455532000.547, "dur": 1.019, + "args": { + "External id": 472250,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455532006.095, "dur": 13.728, + "args": { + "External id": 472251,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455532024.013, "dur": 0.958, + "args": { + "External id": 472252,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455532029.073, "dur": 12.616, + "args": { + "External id": 472253,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455532210.707, "dur": 3508.476, + "args": { + "External id": 472254,"Record function id": 0, "Ev Idx": 10429 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 2338707, "tid": 2379456, + "ts": 6267455532235.041, "dur": 1253.954, + "args": { + "External id": 472255,"Record function id": 0, "Ev Idx": 10430 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338707, "tid": 2379456, + "ts": 6267455532255.239, "dur": 413.220, + "args": { + "External id": 472256,"Record function id": 0, "Ev Idx": 10431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455532351.054, "dur": 5.936, + "args": { + "External id": 472257,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455532361.146, "dur": 1.585, + "args": { + "External id": 472258,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455532364.548, "dur": 1.363, + "args": { + "External id": 472259,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455532367.900, "dur": 0.934, + "args": { + "External id": 472260,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455532370.869, "dur": 0.872, + "args": { + "External id": 472261,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455532373.966, "dur": 3.745, + "args": { + "External id": 472262,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455532381.289, "dur": 0.948, + "args": { + "External id": 472263,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455532383.608, "dur": 1.839, + "args": { + "External id": 472264,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455532387.152, "dur": 0.919, + "args": { + "External id": 472265,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455532389.590, "dur": 0.765, + "args": { + "External id": 472266,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455532412.712, "dur": 214.852, + "args": { + "External id": 472267,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455532432.332, "dur": 187.825, + "args": { + "External id": 472268,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455532462.951, "dur": 18.078, + "args": { + "External id": 472269,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455532485.613, "dur": 80.722, + "args": { + "External id": 472270,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455532488.787, "dur": 77.100, + "args": { + "External id": 472271,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455532494.148, "dur": 6.346, + "args": { + "External id": 472272,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455532502.655, "dur": 62.444, + "args": { + "External id": 472273,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10448 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 2338707, "tid": 2379456, + "ts": 6267455532763.662, "dur": 712.388, + "args": { + "External id": 472274,"Record function id": 0, "Ev Idx": 10449 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338707, "tid": 2379456, + "ts": 6267455532784.347, "dur": 676.584, + "args": { + "External id": 472275,"Record function id": 0, "Ev Idx": 10450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455532851.589, "dur": 6.727, + "args": { + "External id": 472276,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455532876.678, "dur": 35.021, + "args": { + "External id": 472277,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455532882.561, "dur": 4.535, + "args": { + "External id": 472278,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455532889.823, "dur": 0.716, + "args": { + "External id": 472279,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455532892.329, "dur": 0.461, + "args": { + "External id": 472280,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455532894.233, "dur": 0.527, + "args": { + "External id": 472281,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455532896.482, "dur": 0.526, + "args": { + "External id": 472282,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455532898.449, "dur": 0.442, + "args": { + "External id": 472283,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455532900.174, "dur": 0.501, + "args": { + "External id": 472284,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455532902.513, "dur": 0.473, + "args": { + "External id": 472285,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455532904.456, "dur": 2.911, + "args": { + "External id": 472286,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455532924.263, "dur": 47.408, + "args": { + "External id": 472287,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267455533007.639, "dur": 177.874, + "args": { + "External id": 472288,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455533020.390, "dur": 3.977, + "args": { + "External id": 472289,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267455533030.467, "dur": 11.856, + "args": { + "External id": 472290,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267455533035.823, "dur": 6.034, + "args": { + "External id": 472291,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455533039.597, "dur": 0.750, + "args": { + "External id": 472292,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455533050.444, "dur": 69.689, + "args": { + "External id": 472293,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455533052.571, "dur": 0.829, + "args": { + "External id": 472294,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455533095.586, "dur": 1.041, + "args": { + "External id": 472295,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455533099.689, "dur": 0.809, + "args": { + "External id": 472296,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455533101.766, "dur": 0.655, + "args": { + "External id": 472297,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455533104.370, "dur": 0.687, + "args": { + "External id": 472298,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455533106.659, "dur": 2.632, + "args": { + "External id": 472299,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455533110.461, "dur": 0.976, + "args": { + "External id": 472300,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455533113.164, "dur": 0.522, + "args": { + "External id": 472301,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455533115.389, "dur": 0.764, + "args": { + "External id": 472302,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455533134.345, "dur": 39.438, + "args": { + "External id": 472303,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455533243.886, "dur": 134.708, + "args": { + "External id": 472304,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455533270.659, "dur": 104.135, + "args": { + "External id": 472305,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10480, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455533284.312, "dur": 85.424, + "args": { + "External id": 472306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455533398.912, "dur": 2.183, + "args": { + "External id": 472307,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10482, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455533498.553, "dur": 2194.296, + "args": { + "External id": 472308,"Sequence number": 5036611, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10483 + } + }, + { + "ph": "f", "id": 432, "pid": 2338707, "tid": 2379456, "ts": 6267455533498.553, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455533661.547, "dur": 132.237, + "args": { + "External id": 472309,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455533845.595, "dur": 48.131, + "args": { + "External id": 472310,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267455533914.982, "dur": 61.868, + "args": { + "External id": 472311,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455533988.172, "dur": 36.869, + "args": { + "External id": 472312,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455534032.837, "dur": 83.635, + "args": { + "External id": 472313,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455534131.801, "dur": 43.912, + "args": { + "External id": 472314,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455534184.504, "dur": 34.640, + "args": { + "External id": 472315,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455534251.581, "dur": 29.351, + "args": { + "External id": 472316,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455534304.252, "dur": 32.659, + "args": { + "External id": 472317,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455534361.927, "dur": 23.628, + "args": { + "External id": 472318,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455534405.549, "dur": 16.858, + "args": { + "External id": 472319,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455534437.202, "dur": 45.500, + "args": { + "External id": 472320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455534487.746, "dur": 38.475, + "args": { + "External id": 472321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267455534561.322, "dur": 362.246, + "args": { + "External id": 472322,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455534705.258, "dur": 13.582, + "args": { + "External id": 472323,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455534722.218, "dur": 3.574, + "args": { + "External id": 472324,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455534727.408, "dur": 2.506, + "args": { + "External id": 472325,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455534731.274, "dur": 2.194, + "args": { + "External id": 472326,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455534796.035, "dur": 6.262, + "args": { + "External id": 472327,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455534798.465, "dur": 3.608, + "args": { + "External id": 472328,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455534804.389, "dur": 43.933, + "args": { + "External id": 472329,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455534811.554, "dur": 2.255, + "args": { + "External id": 472330,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455534849.855, "dur": 2.667, + "args": { + "External id": 472331,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455534851.566, "dur": 0.865, + "args": { + "External id": 472332,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455534853.868, "dur": 18.874, + "args": { + "External id": 472333,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455534856.502, "dur": 0.656, + "args": { + "External id": 472334,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455534965.622, "dur": 44.520, + "args": { + "External id": 472335,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455535031.669, "dur": 63.302, + "args": { + "External id": 472336,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455535110.330, "dur": 69.184, + "args": { + "External id": 472337,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455535191.614, "dur": 55.127, + "args": { + "External id": 472338,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455535258.527, "dur": 26.195, + "args": { + "External id": 472339,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455535292.273, "dur": 41.580, + "args": { + "External id": 472340,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455535343.023, "dur": 33.773, + "args": { + "External id": 472341,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455535385.291, "dur": 36.642, + "args": { + "External id": 472342,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267455535447.443, "dur": 29.788, + "args": { + "External id": 472343,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455535496.558, "dur": 28.335, + "args": { + "External id": 472344,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455535541.393, "dur": 20.677, + "args": { + "External id": 472345,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455535579.357, "dur": 37.988, + "args": { + "External id": 472346,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267455535636.225, "dur": 20.942, + "args": { + "External id": 472347,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535746.474, "dur": 18.537, + "args": { + "External id": 472348,"Record function id": 0, "Ev Idx": 10523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535750.144, "dur": 13.560, + "args": { + "External id": 472349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535755.167, "dur": 7.297, + "args": { + "External id": 472350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535757.511, "dur": 4.800, + "args": { + "External id": 472351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535769.622, "dur": 8.255, + "args": { + "External id": 472352,"Record function id": 0, "Ev Idx": 10527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535770.872, "dur": 6.491, + "args": { + "External id": 472353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535771.791, "dur": 4.875, + "args": { + "External id": 472354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535772.757, "dur": 3.802, + "args": { + "External id": 472355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535781.693, "dur": 5.465, + "args": { + "External id": 472356,"Record function id": 0, "Ev Idx": 10531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535783.136, "dur": 3.546, + "args": { + "External id": 472357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535783.943, "dur": 2.078, + "args": { + "External id": 472358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535784.572, "dur": 1.367, + "args": { + "External id": 472359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535791.160, "dur": 4.871, + "args": { + "External id": 472360,"Record function id": 0, "Ev Idx": 10535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535792.625, "dur": 2.933, + "args": { + "External id": 472361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535793.324, "dur": 1.725, + "args": { + "External id": 472362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535794.199, "dur": 0.772, + "args": { + "External id": 472363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535799.764, "dur": 4.567, + "args": { + "External id": 472364,"Record function id": 0, "Ev Idx": 10539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535801.009, "dur": 2.830, + "args": { + "External id": 472365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535801.674, "dur": 1.463, + "args": { + "External id": 472366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535802.260, "dur": 0.798, + "args": { + "External id": 472367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535808.055, "dur": 4.676, + "args": { + "External id": 472368,"Record function id": 0, "Ev Idx": 10543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535809.511, "dur": 2.730, + "args": { + "External id": 472369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535810.069, "dur": 1.442, + "args": { + "External id": 472370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535810.586, "dur": 0.800, + "args": { + "External id": 472371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535816.978, "dur": 4.026, + "args": { + "External id": 472372,"Record function id": 0, "Ev Idx": 10547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535818.268, "dur": 2.254, + "args": { + "External id": 472373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535818.905, "dur": 1.079, + "args": { + "External id": 472374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535819.252, "dur": 0.656, + "args": { + "External id": 472375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535824.698, "dur": 4.083, + "args": { + "External id": 472376,"Record function id": 0, "Ev Idx": 10551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535825.936, "dur": 2.362, + "args": { + "External id": 472377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535826.450, "dur": 1.320, + "args": { + "External id": 472378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535826.786, "dur": 0.882, + "args": { + "External id": 472379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535832.758, "dur": 10.393, + "args": { + "External id": 472380,"Record function id": 0, "Ev Idx": 10555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455535837.737, "dur": 4.855, + "args": { + "External id": 472381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535838.320, "dur": 3.795, + "args": { + "External id": 472382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455535838.832, "dur": 3.123, + "args": { + "External id": 472383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455535847.783, "dur": 66312.711, + "args": { + "External id": 472384,"Record function id": 0, "Sequence number": 5036610, "Fwd thread id": 1, "Ev Idx": 10559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455535849.282, "dur": 66298.351, + "args": { + "External id": 472385,"Sequence number": 5036610, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10560 + } + }, + { + "ph": "f", "id": 433, "pid": 2338707, "tid": 2379456, "ts": 6267455535849.282, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 2338707, "tid": 2379456, + "ts": 6267455535884.609, "dur": 49.319, + "args": { + "External id": 472386,"Record function id": 0, "Ev Idx": 10561 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 2338707, "tid": 2379456, + "ts": 6267455535943.822, "dur": 75.562, + "args": { + "External id": 472387,"Record function id": 0, "Ev Idx": 10562 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 2338707, "tid": 2379456, + "ts": 6267455536026.969, "dur": 66106.743, + "args": { + "External id": 472388,"Record function id": 0, "Ev Idx": 10563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455536176.579, "dur": 8.580, + "args": { + "External id": 472389,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455536197.957, "dur": 5.985, + "args": { + "External id": 472390,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455536221.594, "dur": 64630.506, + "args": { + "External id": 472391,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455536237.906, "dur": 64596.817, + "args": { + "External id": 472392,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455536349.052, "dur": 32.488, + "args": { + "External id": 472393,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455536412.487, "dur": 64365.376, + "args": { + "External id": 472394,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455536415.778, "dur": 64360.024, + "args": { + "External id": 472395,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455536425.886, "dur": 12.841, + "args": { + "External id": 472396,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455536440.917, "dur": 64332.337, + "args": { + "External id": 472397,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455600985.252, "dur": 15.634, + "args": { + "External id": 472398,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455600990.652, "dur": 9.685, + "args": { + "External id": 472399,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455601038.965, "dur": 587.793, + "args": { + "External id": 472400,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455601134.911, "dur": 483.367, + "args": { + "External id": 472401,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10576, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455601154.424, "dur": 436.628, + "args": { + "External id": 472402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455601658.105, "dur": 3.354, + "args": { + "External id": 472403,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10578, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455601742.732, "dur": 7.896, + "args": { + "External id": 472404,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455601765.459, "dur": 46.952, + "args": { + "External id": 472405,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455601824.962, "dur": 1.850, + "args": { + "External id": 472406,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455601833.142, "dur": 16.250, + "args": { + "External id": 472407,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455601855.972, "dur": 1.034, + "args": { + "External id": 472408,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455601862.740, "dur": 13.723, + "args": { + "External id": 472409,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455601881.940, "dur": 3.046, + "args": { + "External id": 472410,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455601889.727, "dur": 14.593, + "args": { + "External id": 472411,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455601908.898, "dur": 0.946, + "args": { + "External id": 472412,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455601914.648, "dur": 13.006, + "args": { + "External id": 472413,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455601932.788, "dur": 1.084, + "args": { + "External id": 472414,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455601939.146, "dur": 13.714, + "args": { + "External id": 472415,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455601960.299, "dur": 0.765, + "args": { + "External id": 472416,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455601966.694, "dur": 12.233, + "args": { + "External id": 472417,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455601983.636, "dur": 1.112, + "args": { + "External id": 472418,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455601989.529, "dur": 13.884, + "args": { + "External id": 472419,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455602007.554, "dur": 1.009, + "args": { + "External id": 472420,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455602012.796, "dur": 12.682, + "args": { + "External id": 472421,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455602181.258, "dur": 3512.719, + "args": { + "External id": 472422,"Record function id": 0, "Ev Idx": 10597 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 2338707, "tid": 2379456, + "ts": 6267455602212.290, "dur": 1284.824, + "args": { + "External id": 472423,"Record function id": 0, "Ev Idx": 10598 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338707, "tid": 2379456, + "ts": 6267455602232.053, "dur": 428.509, + "args": { + "External id": 472424,"Record function id": 0, "Ev Idx": 10599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455602331.409, "dur": 5.998, + "args": { + "External id": 472425,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455602341.648, "dur": 1.199, + "args": { + "External id": 472426,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455602345.527, "dur": 3.771, + "args": { + "External id": 472427,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455602351.555, "dur": 1.079, + "args": { + "External id": 472428,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455602354.871, "dur": 0.758, + "args": { + "External id": 472429,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455602357.220, "dur": 0.914, + "args": { + "External id": 472430,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455602359.932, "dur": 0.959, + "args": { + "External id": 472431,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455602362.575, "dur": 1.715, + "args": { + "External id": 472432,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455602368.410, "dur": 0.782, + "args": { + "External id": 472433,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455602370.881, "dur": 0.802, + "args": { + "External id": 472434,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455602395.778, "dur": 224.508, + "args": { + "External id": 472435,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455602415.066, "dur": 196.623, + "args": { + "External id": 472436,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455602449.114, "dur": 21.758, + "args": { + "External id": 472437,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455602476.181, "dur": 85.478, + "args": { + "External id": 472438,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455602479.807, "dur": 81.331, + "args": { + "External id": 472439,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455602484.897, "dur": 7.142, + "args": { + "External id": 472440,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455602493.981, "dur": 66.087, + "args": { + "External id": 472441,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10616 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 2338707, "tid": 2379456, + "ts": 6267455602766.682, "dur": 721.049, + "args": { + "External id": 472442,"Record function id": 0, "Ev Idx": 10617 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338707, "tid": 2379456, + "ts": 6267455602787.272, "dur": 685.425, + "args": { + "External id": 472443,"Record function id": 0, "Ev Idx": 10618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455602858.989, "dur": 8.207, + "args": { + "External id": 472444,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455602884.818, "dur": 33.148, + "args": { + "External id": 472445,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455602891.336, "dur": 1.830, + "args": { + "External id": 472446,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455602895.172, "dur": 0.622, + "args": { + "External id": 472447,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455602898.106, "dur": 0.740, + "args": { + "External id": 472448,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455602900.441, "dur": 0.460, + "args": { + "External id": 472449,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455602902.154, "dur": 0.883, + "args": { + "External id": 472450,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455602904.942, "dur": 2.523, + "args": { + "External id": 472451,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455602909.224, "dur": 0.340, + "args": { + "External id": 472452,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455602910.885, "dur": 0.672, + "args": { + "External id": 472453,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455602913.362, "dur": 0.608, + "args": { + "External id": 472454,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455602930.130, "dur": 50.613, + "args": { + "External id": 472455,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267455603015.855, "dur": 172.812, + "args": { + "External id": 472456,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455603027.864, "dur": 3.887, + "args": { + "External id": 472457,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267455603037.879, "dur": 12.049, + "args": { + "External id": 472458,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267455603043.026, "dur": 6.414, + "args": { + "External id": 472459,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455603047.191, "dur": 0.747, + "args": { + "External id": 472460,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455603100.799, "dur": 28.649, + "args": { + "External id": 472461,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455603104.071, "dur": 0.871, + "args": { + "External id": 472462,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455603106.907, "dur": 0.686, + "args": { + "External id": 472463,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455603110.016, "dur": 3.381, + "args": { + "External id": 472464,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455603115.178, "dur": 0.604, + "args": { + "External id": 472465,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455603117.213, "dur": 0.710, + "args": { + "External id": 472466,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455603119.727, "dur": 0.419, + "args": { + "External id": 472467,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455603121.774, "dur": 0.707, + "args": { + "External id": 472468,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455603123.674, "dur": 0.470, + "args": { + "External id": 472469,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455603125.708, "dur": 0.476, + "args": { + "External id": 472470,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455603143.210, "dur": 36.722, + "args": { + "External id": 472471,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455603244.072, "dur": 147.592, + "args": { + "External id": 472472,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455603284.534, "dur": 102.571, + "args": { + "External id": 472473,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10648, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455603296.095, "dur": 86.037, + "args": { + "External id": 472474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455603409.571, "dur": 2.119, + "args": { + "External id": 472475,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10650, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455603505.712, "dur": 2160.809, + "args": { + "External id": 472476,"Sequence number": 5036609, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10651 + } + }, + { + "ph": "f", "id": 434, "pid": 2338707, "tid": 2379456, "ts": 6267455603505.712, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455603658.099, "dur": 127.884, + "args": { + "External id": 472477,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455603840.626, "dur": 50.751, + "args": { + "External id": 472478,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267455603910.804, "dur": 59.359, + "args": { + "External id": 472479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455603984.510, "dur": 37.452, + "args": { + "External id": 472480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455604029.127, "dur": 79.557, + "args": { + "External id": 472481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455604122.559, "dur": 38.944, + "args": { + "External id": 472482,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455604169.666, "dur": 35.163, + "args": { + "External id": 472483,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455604239.966, "dur": 29.011, + "args": { + "External id": 472484,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455604294.200, "dur": 34.886, + "args": { + "External id": 472485,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455604354.755, "dur": 23.383, + "args": { + "External id": 472486,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455604395.331, "dur": 17.507, + "args": { + "External id": 472487,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455604423.507, "dur": 46.635, + "args": { + "External id": 472488,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455604478.214, "dur": 40.626, + "args": { + "External id": 472489,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267455604557.158, "dur": 355.447, + "args": { + "External id": 472490,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455604671.566, "dur": 8.345, + "args": { + "External id": 472491,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455604683.001, "dur": 3.182, + "args": { + "External id": 472492,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455604688.080, "dur": 2.500, + "args": { + "External id": 472493,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455604692.048, "dur": 3.004, + "args": { + "External id": 472494,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455604753.820, "dur": 6.448, + "args": { + "External id": 472495,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455604756.490, "dur": 3.536, + "args": { + "External id": 472496,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455604762.361, "dur": 55.874, + "args": { + "External id": 472497,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455604768.900, "dur": 4.040, + "args": { + "External id": 472498,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455604824.014, "dur": 2.844, + "args": { + "External id": 472499,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455604825.608, "dur": 1.140, + "args": { + "External id": 472500,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455604828.389, "dur": 23.101, + "args": { + "External id": 472501,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455604831.341, "dur": 0.980, + "args": { + "External id": 472502,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455604954.642, "dur": 38.671, + "args": { + "External id": 472503,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455605013.361, "dur": 23.338, + "args": { + "External id": 472504,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455605047.690, "dur": 103.880, + "args": { + "External id": 472505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455605164.017, "dur": 54.544, + "args": { + "External id": 472506,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455605232.828, "dur": 28.034, + "args": { + "External id": 472507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455605268.389, "dur": 39.177, + "args": { + "External id": 472508,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455605316.736, "dur": 33.497, + "args": { + "External id": 472509,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455605358.983, "dur": 37.395, + "args": { + "External id": 472510,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267455605419.716, "dur": 30.670, + "args": { + "External id": 472511,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455605469.825, "dur": 29.429, + "args": { + "External id": 472512,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455605515.853, "dur": 22.538, + "args": { + "External id": 472513,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455605554.648, "dur": 19.084, + "args": { + "External id": 472514,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267455605587.538, "dur": 37.926, + "args": { + "External id": 472515,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605721.484, "dur": 18.333, + "args": { + "External id": 472516,"Record function id": 0, "Ev Idx": 10691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605725.286, "dur": 13.062, + "args": { + "External id": 472517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605730.465, "dur": 6.758, + "args": { + "External id": 472518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605732.414, "dur": 4.666, + "args": { + "External id": 472519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605744.427, "dur": 6.662, + "args": { + "External id": 472520,"Record function id": 0, "Ev Idx": 10695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605746.357, "dur": 4.194, + "args": { + "External id": 472521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605747.426, "dur": 2.557, + "args": { + "External id": 472522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605748.651, "dur": 1.169, + "args": { + "External id": 472523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605755.181, "dur": 5.984, + "args": { + "External id": 472524,"Record function id": 0, "Ev Idx": 10699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605757.171, "dur": 3.504, + "args": { + "External id": 472525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605758.027, "dur": 2.151, + "args": { + "External id": 472526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605758.760, "dur": 1.336, + "args": { + "External id": 472527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605765.108, "dur": 4.793, + "args": { + "External id": 472528,"Record function id": 0, "Ev Idx": 10703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605766.494, "dur": 2.890, + "args": { + "External id": 472529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605767.433, "dur": 1.444, + "args": { + "External id": 472530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605767.880, "dur": 0.921, + "args": { + "External id": 472531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605773.523, "dur": 4.346, + "args": { + "External id": 472532,"Record function id": 0, "Ev Idx": 10707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605774.954, "dur": 2.445, + "args": { + "External id": 472533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605775.607, "dur": 1.259, + "args": { + "External id": 472534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605776.041, "dur": 0.750, + "args": { + "External id": 472535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605781.633, "dur": 7.302, + "args": { + "External id": 472536,"Record function id": 0, "Ev Idx": 10711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605782.854, "dur": 5.552, + "args": { + "External id": 472537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605783.544, "dur": 4.377, + "args": { + "External id": 472538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605784.430, "dur": 3.369, + "args": { + "External id": 472539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605792.838, "dur": 5.013, + "args": { + "External id": 472540,"Record function id": 0, "Ev Idx": 10715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605794.632, "dur": 2.719, + "args": { + "External id": 472541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605795.392, "dur": 1.466, + "args": { + "External id": 472542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605795.950, "dur": 0.818, + "args": { + "External id": 472543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605801.634, "dur": 4.432, + "args": { + "External id": 472544,"Record function id": 0, "Ev Idx": 10719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605802.958, "dur": 2.630, + "args": { + "External id": 472545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605803.953, "dur": 1.165, + "args": { + "External id": 472546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605804.292, "dur": 0.737, + "args": { + "External id": 472547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605810.177, "dur": 4.800, + "args": { + "External id": 472548,"Record function id": 0, "Ev Idx": 10723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455605811.365, "dur": 3.118, + "args": { + "External id": 472549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605812.159, "dur": 1.638, + "args": { + "External id": 472550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455605812.779, "dur": 0.885, + "args": { + "External id": 472551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455605820.179, "dur": 68315.432, + "args": { + "External id": 472552,"Record function id": 0, "Sequence number": 5036608, "Fwd thread id": 1, "Ev Idx": 10727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455605821.797, "dur": 68301.753, + "args": { + "External id": 472553,"Sequence number": 5036608, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10728 + } + }, + { + "ph": "f", "id": 435, "pid": 2338707, "tid": 2379456, "ts": 6267455605821.797, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 2338707, "tid": 2379456, + "ts": 6267455605856.894, "dur": 47.130, + "args": { + "External id": 472554,"Record function id": 0, "Ev Idx": 10729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 2338707, "tid": 2379456, + "ts": 6267455605913.892, "dur": 72.751, + "args": { + "External id": 472555,"Record function id": 0, "Ev Idx": 10730 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 2338707, "tid": 2379456, + "ts": 6267455605994.058, "dur": 68119.158, + "args": { + "External id": 472556,"Record function id": 0, "Ev Idx": 10731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455606145.312, "dur": 9.603, + "args": { + "External id": 472557,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455606168.567, "dur": 5.952, + "args": { + "External id": 472558,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455606196.568, "dur": 66699.036, + "args": { + "External id": 472559,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455606213.774, "dur": 66665.101, + "args": { + "External id": 472560,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455606328.253, "dur": 21.632, + "args": { + "External id": 472561,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455606376.220, "dur": 66438.112, + "args": { + "External id": 472562,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455606379.692, "dur": 66433.317, + "args": { + "External id": 472563,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455606385.669, "dur": 12.035, + "args": { + "External id": 472564,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455606399.988, "dur": 66406.964, + "args": { + "External id": 472565,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455673037.949, "dur": 50.542, + "args": { + "External id": 472566,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455673043.279, "dur": 44.146, + "args": { + "External id": 472567,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455673128.096, "dur": 442.837, + "args": { + "External id": 472568,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455673169.683, "dur": 395.477, + "args": { + "External id": 472569,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10744, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455673184.626, "dur": 374.142, + "args": { + "External id": 472570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455673611.510, "dur": 3.774, + "args": { + "External id": 472571,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10746, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455673691.915, "dur": 10.233, + "args": { + "External id": 472572,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455673716.831, "dur": 45.117, + "args": { + "External id": 472573,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455673775.462, "dur": 1.882, + "args": { + "External id": 472574,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455673783.437, "dur": 16.320, + "args": { + "External id": 472575,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455673806.464, "dur": 1.119, + "args": { + "External id": 472576,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455673846.576, "dur": 14.214, + "args": { + "External id": 472577,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455673869.819, "dur": 1.014, + "args": { + "External id": 472578,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455673875.430, "dur": 14.121, + "args": { + "External id": 472579,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455673894.353, "dur": 0.799, + "args": { + "External id": 472580,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455673899.093, "dur": 12.747, + "args": { + "External id": 472581,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455673916.278, "dur": 0.906, + "args": { + "External id": 472582,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455673921.137, "dur": 13.528, + "args": { + "External id": 472583,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455673941.953, "dur": 0.769, + "args": { + "External id": 472584,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455673946.872, "dur": 12.834, + "args": { + "External id": 472585,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455673964.431, "dur": 0.734, + "args": { + "External id": 472586,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455673968.908, "dur": 13.392, + "args": { + "External id": 472587,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455673986.958, "dur": 3.230, + "args": { + "External id": 472588,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455673994.238, "dur": 12.113, + "args": { + "External id": 472589,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455674156.767, "dur": 3502.079, + "args": { + "External id": 472590,"Record function id": 0, "Ev Idx": 10765 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 2338707, "tid": 2379456, + "ts": 6267455674183.339, "dur": 1254.260, + "args": { + "External id": 472591,"Record function id": 0, "Ev Idx": 10766 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338707, "tid": 2379456, + "ts": 6267455674201.528, "dur": 415.391, + "args": { + "External id": 472592,"Record function id": 0, "Ev Idx": 10767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455674302.597, "dur": 5.652, + "args": { + "External id": 472593,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455674312.233, "dur": 1.154, + "args": { + "External id": 472594,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455674315.813, "dur": 1.109, + "args": { + "External id": 472595,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455674318.703, "dur": 0.986, + "args": { + "External id": 472596,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455674321.833, "dur": 0.840, + "args": { + "External id": 472597,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455674324.242, "dur": 0.723, + "args": { + "External id": 472598,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455674326.665, "dur": 0.728, + "args": { + "External id": 472599,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455674329.132, "dur": 4.331, + "args": { + "External id": 472600,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455674335.363, "dur": 0.722, + "args": { + "External id": 472601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455674339.395, "dur": 0.729, + "args": { + "External id": 472602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455674360.865, "dur": 200.949, + "args": { + "External id": 472603,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455674379.972, "dur": 176.013, + "args": { + "External id": 472604,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455674408.845, "dur": 18.649, + "args": { + "External id": 472605,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455674431.832, "dur": 88.132, + "args": { + "External id": 472606,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455674436.789, "dur": 82.825, + "args": { + "External id": 472607,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455674441.549, "dur": 6.881, + "args": { + "External id": 472608,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455674450.468, "dur": 68.205, + "args": { + "External id": 472609,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10784 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 2338707, "tid": 2379456, + "ts": 6267455674716.077, "dur": 712.632, + "args": { + "External id": 472610,"Record function id": 0, "Ev Idx": 10785 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338707, "tid": 2379456, + "ts": 6267455674737.928, "dur": 675.358, + "args": { + "External id": 472611,"Record function id": 0, "Ev Idx": 10786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455674809.544, "dur": 7.251, + "args": { + "External id": 472612,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455674835.161, "dur": 33.898, + "args": { + "External id": 472613,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455674841.394, "dur": 2.380, + "args": { + "External id": 472614,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455674846.355, "dur": 0.865, + "args": { + "External id": 472615,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455674849.284, "dur": 2.617, + "args": { + "External id": 472616,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455674853.196, "dur": 0.458, + "args": { + "External id": 472617,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455674855.700, "dur": 0.454, + "args": { + "External id": 472618,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455674857.901, "dur": 0.640, + "args": { + "External id": 472619,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455674859.742, "dur": 0.626, + "args": { + "External id": 472620,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455674862.078, "dur": 0.379, + "args": { + "External id": 472621,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455674864.459, "dur": 0.410, + "args": { + "External id": 472622,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455674881.014, "dur": 50.724, + "args": { + "External id": 472623,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267455674966.836, "dur": 167.913, + "args": { + "External id": 472624,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455674978.394, "dur": 3.634, + "args": { + "External id": 472625,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267455674988.210, "dur": 14.207, + "args": { + "External id": 472626,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267455674993.187, "dur": 8.731, + "args": { + "External id": 472627,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455674997.423, "dur": 3.048, + "args": { + "External id": 472628,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455675009.948, "dur": 25.353, + "args": { + "External id": 472629,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455675011.924, "dur": 0.748, + "args": { + "External id": 472630,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455675014.534, "dur": 0.645, + "args": { + "External id": 472631,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455675016.747, "dur": 0.712, + "args": { + "External id": 472632,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455675019.459, "dur": 0.584, + "args": { + "External id": 472633,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455675021.974, "dur": 0.394, + "args": { + "External id": 472634,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455675023.359, "dur": 0.536, + "args": { + "External id": 472635,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455675025.561, "dur": 0.437, + "args": { + "External id": 472636,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455675027.765, "dur": 2.630, + "args": { + "External id": 472637,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455675031.447, "dur": 0.774, + "args": { + "External id": 472638,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455675047.157, "dur": 76.003, + "args": { + "External id": 472639,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455675191.650, "dur": 136.960, + "args": { + "External id": 472640,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455675220.089, "dur": 104.387, + "args": { + "External id": 472641,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10816, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455675233.387, "dur": 85.973, + "args": { + "External id": 472642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455675348.575, "dur": 2.354, + "args": { + "External id": 472643,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10818, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455675445.989, "dur": 2185.998, + "args": { + "External id": 472644,"Sequence number": 5036607, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10819 + } + }, + { + "ph": "f", "id": 436, "pid": 2338707, "tid": 2379456, "ts": 6267455675445.989, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455675574.264, "dur": 157.954, + "args": { + "External id": 472645,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455675785.800, "dur": 47.493, + "args": { + "External id": 472646,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267455675857.377, "dur": 61.082, + "args": { + "External id": 472647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455675930.034, "dur": 38.317, + "args": { + "External id": 472648,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455675975.684, "dur": 41.024, + "args": { + "External id": 472649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455676024.759, "dur": 78.632, + "args": { + "External id": 472650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455676123.379, "dur": 41.265, + "args": { + "External id": 472651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455676199.985, "dur": 30.493, + "args": { + "External id": 472652,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455676253.165, "dur": 35.522, + "args": { + "External id": 472653,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455676314.950, "dur": 22.332, + "args": { + "External id": 472654,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455676353.907, "dur": 17.352, + "args": { + "External id": 472655,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455676384.019, "dur": 45.370, + "args": { + "External id": 472656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455676434.379, "dur": 38.960, + "args": { + "External id": 472657,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267455676508.159, "dur": 351.919, + "args": { + "External id": 472658,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455676620.809, "dur": 8.983, + "args": { + "External id": 472659,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455676632.751, "dur": 3.371, + "args": { + "External id": 472660,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455676637.671, "dur": 2.885, + "args": { + "External id": 472661,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455676642.042, "dur": 5.534, + "args": { + "External id": 472662,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455676698.979, "dur": 6.163, + "args": { + "External id": 472663,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455676701.461, "dur": 3.459, + "args": { + "External id": 472664,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455676707.300, "dur": 54.926, + "args": { + "External id": 472665,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455676713.949, "dur": 2.179, + "args": { + "External id": 472666,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455676767.613, "dur": 2.909, + "args": { + "External id": 472667,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455676769.347, "dur": 1.082, + "args": { + "External id": 472668,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455676772.215, "dur": 20.812, + "args": { + "External id": 472669,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455676775.740, "dur": 1.040, + "args": { + "External id": 472670,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455676903.084, "dur": 38.545, + "args": { + "External id": 472671,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455676977.383, "dur": 23.226, + "args": { + "External id": 472672,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455677011.381, "dur": 101.907, + "args": { + "External id": 472673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455677125.116, "dur": 57.266, + "args": { + "External id": 472674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455677197.801, "dur": 31.337, + "args": { + "External id": 472675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455677236.508, "dur": 38.217, + "args": { + "External id": 472676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455677286.822, "dur": 34.332, + "args": { + "External id": 472677,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455677329.885, "dur": 36.670, + "args": { + "External id": 472678,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267455677390.136, "dur": 30.432, + "args": { + "External id": 472679,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455677439.274, "dur": 29.451, + "args": { + "External id": 472680,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455677485.382, "dur": 21.884, + "args": { + "External id": 472681,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455677523.852, "dur": 19.063, + "args": { + "External id": 472682,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267455677557.445, "dur": 18.822, + "args": { + "External id": 472683,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677686.672, "dur": 22.333, + "args": { + "External id": 472684,"Record function id": 0, "Ev Idx": 10859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677690.477, "dur": 17.153, + "args": { + "External id": 472685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677695.797, "dur": 10.480, + "args": { + "External id": 472686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677697.985, "dur": 8.140, + "args": { + "External id": 472687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677713.798, "dur": 5.735, + "args": { + "External id": 472688,"Record function id": 0, "Ev Idx": 10863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677715.198, "dur": 3.781, + "args": { + "External id": 472689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677716.099, "dur": 2.224, + "args": { + "External id": 472690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677716.976, "dur": 1.246, + "args": { + "External id": 472691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677723.469, "dur": 5.506, + "args": { + "External id": 472692,"Record function id": 0, "Ev Idx": 10867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677724.954, "dur": 3.515, + "args": { + "External id": 472693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677725.959, "dur": 1.922, + "args": { + "External id": 472694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677726.663, "dur": 1.138, + "args": { + "External id": 472695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677732.980, "dur": 4.238, + "args": { + "External id": 472696,"Record function id": 0, "Ev Idx": 10871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677734.197, "dur": 2.537, + "args": { + "External id": 472697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677734.785, "dur": 1.456, + "args": { + "External id": 472698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677735.184, "dur": 0.979, + "args": { + "External id": 472699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677740.984, "dur": 6.791, + "args": { + "External id": 472700,"Record function id": 0, "Ev Idx": 10875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677742.195, "dur": 5.095, + "args": { + "External id": 472701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677742.845, "dur": 3.607, + "args": { + "External id": 472702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677743.454, "dur": 2.926, + "args": { + "External id": 472703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677751.508, "dur": 4.512, + "args": { + "External id": 472704,"Record function id": 0, "Ev Idx": 10879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677752.913, "dur": 2.608, + "args": { + "External id": 472705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677753.519, "dur": 1.498, + "args": { + "External id": 472706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677754.165, "dur": 0.742, + "args": { + "External id": 472707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677762.885, "dur": 4.576, + "args": { + "External id": 472708,"Record function id": 0, "Ev Idx": 10883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677764.266, "dur": 2.691, + "args": { + "External id": 472709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677765.132, "dur": 1.334, + "args": { + "External id": 472710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677765.637, "dur": 0.736, + "args": { + "External id": 472711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677771.351, "dur": 4.999, + "args": { + "External id": 472712,"Record function id": 0, "Ev Idx": 10887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677772.754, "dur": 3.108, + "args": { + "External id": 472713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677773.611, "dur": 1.742, + "args": { + "External id": 472714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677774.305, "dur": 0.938, + "args": { + "External id": 472715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677780.045, "dur": 5.596, + "args": { + "External id": 472716,"Record function id": 0, "Ev Idx": 10891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455677781.507, "dur": 3.649, + "args": { + "External id": 472717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677782.521, "dur": 1.926, + "args": { + "External id": 472718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455677783.430, "dur": 0.879, + "args": { + "External id": 472719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455677790.514, "dur": 64072.629, + "args": { + "External id": 472720,"Record function id": 0, "Sequence number": 5036606, "Fwd thread id": 1, "Ev Idx": 10895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455677792.425, "dur": 64059.708, + "args": { + "External id": 472721,"Sequence number": 5036606, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10896 + } + }, + { + "ph": "f", "id": 437, "pid": 2338707, "tid": 2379456, "ts": 6267455677792.425, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 2338707, "tid": 2379456, + "ts": 6267455677827.559, "dur": 47.910, + "args": { + "External id": 472722,"Record function id": 0, "Ev Idx": 10897 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 2338707, "tid": 2379456, + "ts": 6267455677884.717, "dur": 73.058, + "args": { + "External id": 472723,"Record function id": 0, "Ev Idx": 10898 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 2338707, "tid": 2379456, + "ts": 6267455677964.881, "dur": 63877.905, + "args": { + "External id": 472724,"Record function id": 0, "Ev Idx": 10899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455678112.152, "dur": 10.756, + "args": { + "External id": 472725,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455678135.757, "dur": 6.418, + "args": { + "External id": 472726,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455678161.944, "dur": 62492.204, + "args": { + "External id": 472727,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455678178.142, "dur": 62460.141, + "args": { + "External id": 472728,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455678286.545, "dur": 20.798, + "args": { + "External id": 472729,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455678331.830, "dur": 62227.594, + "args": { + "External id": 472730,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455678335.204, "dur": 62222.685, + "args": { + "External id": 472731,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455678341.339, "dur": 11.948, + "args": { + "External id": 472732,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455678355.933, "dur": 62193.819, + "args": { + "External id": 472733,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455740785.195, "dur": 18.358, + "args": { + "External id": 472734,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455740789.857, "dur": 13.147, + "args": { + "External id": 472735,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455740842.292, "dur": 528.250, + "args": { + "External id": 472736,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455740878.185, "dur": 485.002, + "args": { + "External id": 472737,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10912, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455740892.036, "dur": 463.130, + "args": { + "External id": 472738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455741398.898, "dur": 2.781, + "args": { + "External id": 472739,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10914, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455741478.892, "dur": 8.226, + "args": { + "External id": 472740,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455741501.177, "dur": 41.588, + "args": { + "External id": 472741,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455741555.305, "dur": 1.986, + "args": { + "External id": 472742,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455741563.527, "dur": 14.871, + "args": { + "External id": 472743,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455741585.332, "dur": 1.232, + "args": { + "External id": 472744,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455741608.883, "dur": 16.162, + "args": { + "External id": 472745,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455741634.065, "dur": 1.840, + "args": { + "External id": 472746,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455741641.104, "dur": 12.735, + "args": { + "External id": 472747,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455741658.828, "dur": 0.991, + "args": { + "External id": 472748,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455741664.333, "dur": 11.986, + "args": { + "External id": 472749,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455741681.183, "dur": 2.044, + "args": { + "External id": 472750,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455741687.762, "dur": 12.217, + "args": { + "External id": 472751,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455741704.658, "dur": 1.191, + "args": { + "External id": 472752,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455741710.449, "dur": 11.036, + "args": { + "External id": 472753,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455741728.059, "dur": 3.607, + "args": { + "External id": 472754,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455741736.132, "dur": 12.736, + "args": { + "External id": 472755,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455741753.134, "dur": 1.124, + "args": { + "External id": 472756,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455741759.053, "dur": 13.173, + "args": { + "External id": 472757,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455741881.695, "dur": 3493.367, + "args": { + "External id": 472758,"Record function id": 0, "Ev Idx": 10933 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 2338707, "tid": 2379456, + "ts": 6267455741906.337, "dur": 1292.243, + "args": { + "External id": 472759,"Record function id": 0, "Ev Idx": 10934 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338707, "tid": 2379456, + "ts": 6267455741924.713, "dur": 429.469, + "args": { + "External id": 472760,"Record function id": 0, "Ev Idx": 10935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455742021.320, "dur": 4.604, + "args": { + "External id": 472761,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455742029.433, "dur": 1.107, + "args": { + "External id": 472762,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455742032.909, "dur": 1.137, + "args": { + "External id": 472763,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455742037.776, "dur": 0.904, + "args": { + "External id": 472764,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455742040.508, "dur": 0.808, + "args": { + "External id": 472765,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455742043.315, "dur": 1.026, + "args": { + "External id": 472766,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455742046.293, "dur": 3.439, + "args": { + "External id": 472767,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455742053.483, "dur": 1.784, + "args": { + "External id": 472768,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455742096.749, "dur": 1.687, + "args": { + "External id": 472769,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455742100.893, "dur": 0.732, + "args": { + "External id": 472770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455742123.608, "dur": 192.823, + "args": { + "External id": 472771,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455742143.370, "dur": 167.180, + "args": { + "External id": 472772,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455742173.245, "dur": 16.319, + "args": { + "External id": 472773,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455742194.129, "dur": 86.827, + "args": { + "External id": 472774,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455742197.300, "dur": 83.336, + "args": { + "External id": 472775,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742202.489, "dur": 6.951, + "args": { + "External id": 472776,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455742211.329, "dur": 65.275, + "args": { + "External id": 472777,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10952 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 2338707, "tid": 2379456, + "ts": 6267455742449.941, "dur": 739.553, + "args": { + "External id": 472778,"Record function id": 0, "Ev Idx": 10953 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338707, "tid": 2379456, + "ts": 6267455742470.227, "dur": 704.470, + "args": { + "External id": 472779,"Record function id": 0, "Ev Idx": 10954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455742538.459, "dur": 5.850, + "args": { + "External id": 472780,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455742562.167, "dur": 54.512, + "args": { + "External id": 472781,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742568.779, "dur": 2.016, + "args": { + "External id": 472782,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742572.798, "dur": 3.301, + "args": { + "External id": 472783,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742578.091, "dur": 0.787, + "args": { + "External id": 472784,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742580.530, "dur": 0.704, + "args": { + "External id": 472785,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742582.801, "dur": 0.695, + "args": { + "External id": 472786,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742585.209, "dur": 0.600, + "args": { + "External id": 472787,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742587.834, "dur": 0.422, + "args": { + "External id": 472788,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742589.694, "dur": 0.820, + "args": { + "External id": 472789,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742610.387, "dur": 1.193, + "args": { + "External id": 472790,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455742629.951, "dur": 51.763, + "args": { + "External id": 472791,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267455742720.348, "dur": 130.391, + "args": { + "External id": 472792,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455742733.778, "dur": 6.648, + "args": { + "External id": 472793,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267455742746.865, "dur": 12.350, + "args": { + "External id": 472794,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267455742752.374, "dur": 6.388, + "args": { + "External id": 472795,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742756.578, "dur": 0.584, + "args": { + "External id": 472796,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455742767.721, "dur": 28.495, + "args": { + "External id": 472797,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742770.234, "dur": 0.572, + "args": { + "External id": 472798,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742772.299, "dur": 0.664, + "args": { + "External id": 472799,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742774.567, "dur": 0.606, + "args": { + "External id": 472800,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742776.995, "dur": 0.280, + "args": { + "External id": 472801,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742778.408, "dur": 0.411, + "args": { + "External id": 472802,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742780.254, "dur": 0.528, + "args": { + "External id": 472803,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742785.750, "dur": 2.384, + "args": { + "External id": 472804,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742789.755, "dur": 0.791, + "args": { + "External id": 472805,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455742792.436, "dur": 0.699, + "args": { + "External id": 472806,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455742807.919, "dur": 33.963, + "args": { + "External id": 472807,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455742904.746, "dur": 140.663, + "args": { + "External id": 472808,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455742935.619, "dur": 105.558, + "args": { + "External id": 472809,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10984, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455742951.275, "dur": 84.653, + "args": { + "External id": 472810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455743104.357, "dur": 3.666, + "args": { + "External id": 472811,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10986, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455743207.523, "dur": 2142.655, + "args": { + "External id": 472812,"Sequence number": 5036605, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10987 + } + }, + { + "ph": "f", "id": 438, "pid": 2338707, "tid": 2379456, "ts": 6267455743207.523, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455743343.483, "dur": 131.529, + "args": { + "External id": 472813,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455743525.905, "dur": 47.049, + "args": { + "External id": 472814,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267455743619.694, "dur": 68.691, + "args": { + "External id": 472815,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455743706.937, "dur": 39.051, + "args": { + "External id": 472816,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455743753.585, "dur": 39.900, + "args": { + "External id": 472817,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455743803.591, "dur": 32.470, + "args": { + "External id": 472818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455743843.274, "dur": 34.097, + "args": { + "External id": 472819,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455743909.385, "dur": 31.114, + "args": { + "External id": 472820,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455743962.446, "dur": 33.473, + "args": { + "External id": 472821,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455744021.244, "dur": 23.417, + "args": { + "External id": 472822,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455744114.930, "dur": 25.613, + "args": { + "External id": 472823,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455744156.212, "dur": 53.430, + "args": { + "External id": 472824,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455744214.508, "dur": 39.356, + "args": { + "External id": 472825,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267455744288.390, "dur": 299.400, + "args": { + "External id": 472826,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455744380.150, "dur": 8.164, + "args": { + "External id": 472827,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455744390.799, "dur": 2.900, + "args": { + "External id": 472828,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455744395.146, "dur": 2.350, + "args": { + "External id": 472829,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455744398.862, "dur": 5.114, + "args": { + "External id": 472830,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455744456.194, "dur": 6.038, + "args": { + "External id": 472831,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455744458.305, "dur": 3.700, + "args": { + "External id": 472832,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455744464.254, "dur": 37.680, + "args": { + "External id": 472833,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455744471.019, "dur": 2.021, + "args": { + "External id": 472834,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455744503.778, "dur": 2.280, + "args": { + "External id": 472835,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455744505.311, "dur": 0.651, + "args": { + "External id": 472836,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455744507.300, "dur": 17.197, + "args": { + "External id": 472837,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455744509.783, "dur": 1.110, + "args": { + "External id": 472838,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455744663.459, "dur": 43.648, + "args": { + "External id": 472839,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455744727.074, "dur": 20.679, + "args": { + "External id": 472840,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455744758.407, "dur": 61.527, + "args": { + "External id": 472841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455744828.285, "dur": 48.481, + "args": { + "External id": 472842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455744889.408, "dur": 26.606, + "args": { + "External id": 472843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455744923.650, "dur": 39.259, + "args": { + "External id": 472844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455744971.709, "dur": 37.971, + "args": { + "External id": 472845,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455745018.258, "dur": 78.777, + "args": { + "External id": 472846,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267455745124.340, "dur": 31.460, + "args": { + "External id": 472847,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455745174.723, "dur": 29.612, + "args": { + "External id": 472848,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455745220.415, "dur": 21.474, + "args": { + "External id": 472849,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455745257.965, "dur": 18.884, + "args": { + "External id": 472850,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267455745290.711, "dur": 20.627, + "args": { + "External id": 472851,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745401.782, "dur": 18.458, + "args": { + "External id": 472852,"Record function id": 0, "Ev Idx": 11027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745405.786, "dur": 13.169, + "args": { + "External id": 472853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745410.857, "dur": 6.958, + "args": { + "External id": 472854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745413.015, "dur": 4.643, + "args": { + "External id": 472855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745424.974, "dur": 5.655, + "args": { + "External id": 472856,"Record function id": 0, "Ev Idx": 11031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745426.480, "dur": 3.646, + "args": { + "External id": 472857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745427.373, "dur": 2.036, + "args": { + "External id": 472858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745428.282, "dur": 1.013, + "args": { + "External id": 472859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745434.756, "dur": 4.952, + "args": { + "External id": 472860,"Record function id": 0, "Ev Idx": 11035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745436.022, "dur": 3.182, + "args": { + "External id": 472861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745436.694, "dur": 1.910, + "args": { + "External id": 472862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745437.411, "dur": 1.118, + "args": { + "External id": 472863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745443.564, "dur": 4.896, + "args": { + "External id": 472864,"Record function id": 0, "Ev Idx": 11039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745444.818, "dur": 3.134, + "args": { + "External id": 472865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745445.380, "dur": 2.120, + "args": { + "External id": 472866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745446.070, "dur": 1.350, + "args": { + "External id": 472867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745452.292, "dur": 5.607, + "args": { + "External id": 472868,"Record function id": 0, "Ev Idx": 11043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745454.091, "dur": 3.332, + "args": { + "External id": 472869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745454.946, "dur": 1.893, + "args": { + "External id": 472870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745455.821, "dur": 0.944, + "args": { + "External id": 472871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745461.643, "dur": 7.212, + "args": { + "External id": 472872,"Record function id": 0, "Ev Idx": 11047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745463.067, "dur": 5.261, + "args": { + "External id": 472873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745463.841, "dur": 3.976, + "args": { + "External id": 472874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745464.534, "dur": 3.179, + "args": { + "External id": 472875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745472.667, "dur": 4.522, + "args": { + "External id": 472876,"Record function id": 0, "Ev Idx": 11051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745474.331, "dur": 2.378, + "args": { + "External id": 472877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745475.012, "dur": 1.241, + "args": { + "External id": 472878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745475.352, "dur": 0.804, + "args": { + "External id": 472879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745480.862, "dur": 3.968, + "args": { + "External id": 472880,"Record function id": 0, "Ev Idx": 11055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745481.980, "dur": 2.369, + "args": { + "External id": 472881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745482.545, "dur": 1.263, + "args": { + "External id": 472882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745482.873, "dur": 0.847, + "args": { + "External id": 472883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745488.792, "dur": 4.408, + "args": { + "External id": 472884,"Record function id": 0, "Ev Idx": 11059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455745490.031, "dur": 2.667, + "args": { + "External id": 472885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745490.660, "dur": 1.548, + "args": { + "External id": 472886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455745491.370, "dur": 0.698, + "args": { + "External id": 472887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455745498.483, "dur": 62770.000, + "args": { + "External id": 472888,"Record function id": 0, "Sequence number": 5036604, "Fwd thread id": 1, "Ev Idx": 11063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455745499.837, "dur": 62758.103, + "args": { + "External id": 472889,"Sequence number": 5036604, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11064 + } + }, + { + "ph": "f", "id": 439, "pid": 2338707, "tid": 2379456, "ts": 6267455745499.837, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 2338707, "tid": 2379456, + "ts": 6267455745535.442, "dur": 45.848, + "args": { + "External id": 472890,"Record function id": 0, "Ev Idx": 11065 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 2338707, "tid": 2379456, + "ts": 6267455745589.982, "dur": 97.742, + "args": { + "External id": 472891,"Record function id": 0, "Ev Idx": 11066 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 2338707, "tid": 2379456, + "ts": 6267455745697.072, "dur": 62551.931, + "args": { + "External id": 472892,"Record function id": 0, "Ev Idx": 11067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455745809.146, "dur": 9.128, + "args": { + "External id": 472893,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455745831.091, "dur": 5.762, + "args": { + "External id": 472894,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455745855.503, "dur": 61330.027, + "args": { + "External id": 472895,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455745872.139, "dur": 61296.723, + "args": { + "External id": 472896,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455745988.386, "dur": 21.508, + "args": { + "External id": 472897,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455746036.127, "dur": 61074.656, + "args": { + "External id": 472898,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455746039.840, "dur": 61069.729, + "args": { + "External id": 472899,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455746045.491, "dur": 56.327, + "args": { + "External id": 472900,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455746106.485, "dur": 60995.590, + "args": { + "External id": 472901,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455807335.151, "dur": 18.090, + "args": { + "External id": 472902,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455807340.479, "dur": 12.259, + "args": { + "External id": 472903,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455807390.213, "dur": 362.554, + "args": { + "External id": 472904,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455807426.453, "dur": 319.586, + "args": { + "External id": 472905,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11080, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455807442.558, "dur": 296.794, + "args": { + "External id": 472906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455807778.705, "dur": 2.842, + "args": { + "External id": 472907,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11082, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455807850.703, "dur": 8.052, + "args": { + "External id": 472908,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455807874.174, "dur": 43.849, + "args": { + "External id": 472909,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455807930.474, "dur": 4.433, + "args": { + "External id": 472910,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455807940.629, "dur": 17.389, + "args": { + "External id": 472911,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455807964.393, "dur": 1.101, + "args": { + "External id": 472912,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455807970.454, "dur": 12.988, + "args": { + "External id": 472913,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455807989.626, "dur": 0.873, + "args": { + "External id": 472914,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455807994.803, "dur": 14.726, + "args": { + "External id": 472915,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455808015.023, "dur": 0.955, + "args": { + "External id": 472916,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455808020.603, "dur": 12.460, + "args": { + "External id": 472917,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455808037.786, "dur": 1.517, + "args": { + "External id": 472918,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455808043.613, "dur": 50.456, + "args": { + "External id": 472919,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455808104.697, "dur": 2.280, + "args": { + "External id": 472920,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455808112.827, "dur": 16.971, + "args": { + "External id": 472921,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455808135.422, "dur": 1.204, + "args": { + "External id": 472922,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455808141.570, "dur": 14.032, + "args": { + "External id": 472923,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455808160.282, "dur": 1.434, + "args": { + "External id": 472924,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455808166.229, "dur": 12.266, + "args": { + "External id": 472925,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455808288.174, "dur": 3494.743, + "args": { + "External id": 472926,"Record function id": 0, "Ev Idx": 11101 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 2338707, "tid": 2379456, + "ts": 6267455808315.163, "dur": 1260.616, + "args": { + "External id": 472927,"Record function id": 0, "Ev Idx": 11102 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338707, "tid": 2379456, + "ts": 6267455808335.242, "dur": 414.548, + "args": { + "External id": 472928,"Record function id": 0, "Ev Idx": 11103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455808437.006, "dur": 7.452, + "args": { + "External id": 472929,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455808448.056, "dur": 1.188, + "args": { + "External id": 472930,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455808451.870, "dur": 1.270, + "args": { + "External id": 472931,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455808455.352, "dur": 1.180, + "args": { + "External id": 472932,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455808460.479, "dur": 1.132, + "args": { + "External id": 472933,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455808463.316, "dur": 0.807, + "args": { + "External id": 472934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455808466.052, "dur": 0.993, + "args": { + "External id": 472935,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455808468.793, "dur": 1.937, + "args": { + "External id": 472936,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455808474.309, "dur": 3.214, + "args": { + "External id": 472937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455808479.457, "dur": 0.857, + "args": { + "External id": 472938,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455808501.283, "dur": 208.692, + "args": { + "External id": 472939,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455808520.753, "dur": 183.199, + "args": { + "External id": 472940,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455808549.639, "dur": 16.401, + "args": { + "External id": 472941,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455808570.834, "dur": 99.099, + "args": { + "External id": 472942,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455808574.175, "dur": 95.167, + "args": { + "External id": 472943,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455808579.429, "dur": 6.221, + "args": { + "External id": 472944,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455808588.045, "dur": 80.162, + "args": { + "External id": 472945,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11120 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 2338707, "tid": 2379456, + "ts": 6267455808846.623, "dur": 720.261, + "args": { + "External id": 472946,"Record function id": 0, "Ev Idx": 11121 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338707, "tid": 2379456, + "ts": 6267455808868.345, "dur": 683.483, + "args": { + "External id": 472947,"Record function id": 0, "Ev Idx": 11122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455808938.089, "dur": 6.985, + "args": { + "External id": 472948,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455808963.534, "dur": 34.833, + "args": { + "External id": 472949,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455808970.251, "dur": 1.783, + "args": { + "External id": 472950,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455808974.021, "dur": 0.889, + "args": { + "External id": 472951,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455808977.333, "dur": 0.703, + "args": { + "External id": 472952,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455808979.581, "dur": 3.128, + "args": { + "External id": 472953,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455808983.882, "dur": 0.773, + "args": { + "External id": 472954,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455808986.867, "dur": 0.377, + "args": { + "External id": 472955,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455808989.165, "dur": 0.307, + "args": { + "External id": 472956,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455808990.924, "dur": 0.550, + "args": { + "External id": 472957,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455808994.051, "dur": 0.440, + "args": { + "External id": 472958,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455809010.563, "dur": 86.786, + "args": { + "External id": 472959,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267455809139.386, "dur": 144.083, + "args": { + "External id": 472960,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 11135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455809153.926, "dur": 6.038, + "args": { + "External id": 472961,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267455809169.354, "dur": 13.214, + "args": { + "External id": 472962,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267455809174.809, "dur": 7.302, + "args": { + "External id": 472963,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 11138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455809179.011, "dur": 0.888, + "args": { + "External id": 472964,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455809191.463, "dur": 30.322, + "args": { + "External id": 472965,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455809193.804, "dur": 3.187, + "args": { + "External id": 472966,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455809198.759, "dur": 0.553, + "args": { + "External id": 472967,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455809201.704, "dur": 0.536, + "args": { + "External id": 472968,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455809204.524, "dur": 0.484, + "args": { + "External id": 472969,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455809206.105, "dur": 0.517, + "args": { + "External id": 472970,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455809208.102, "dur": 0.406, + "args": { + "External id": 472971,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455809210.218, "dur": 0.622, + "args": { + "External id": 472972,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455809211.889, "dur": 0.405, + "args": { + "External id": 472973,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455809214.007, "dur": 3.691, + "args": { + "External id": 472974,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455809238.864, "dur": 35.254, + "args": { + "External id": 472975,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455809337.880, "dur": 132.074, + "args": { + "External id": 472976,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 11151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455809365.259, "dur": 100.565, + "args": { + "External id": 472977,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11152, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455809378.338, "dur": 82.297, + "args": { + "External id": 472978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 11153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455809488.096, "dur": 2.003, + "args": { + "External id": 472979,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11154, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455809584.615, "dur": 2173.898, + "args": { + "External id": 472980,"Sequence number": 5036603, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11155 + } + }, + { + "ph": "f", "id": 440, "pid": 2338707, "tid": 2379456, "ts": 6267455809584.615, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455809743.947, "dur": 132.442, + "args": { + "External id": 472981,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455809930.040, "dur": 49.710, + "args": { + "External id": 472982,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267455810002.279, "dur": 110.438, + "args": { + "External id": 472983,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455810129.742, "dur": 44.989, + "args": { + "External id": 472984,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455810182.685, "dur": 39.932, + "args": { + "External id": 472985,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455810230.648, "dur": 32.582, + "args": { + "External id": 472986,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455810273.698, "dur": 35.938, + "args": { + "External id": 472987,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455810343.209, "dur": 31.278, + "args": { + "External id": 472988,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455810398.063, "dur": 34.659, + "args": { + "External id": 472989,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455810459.633, "dur": 23.939, + "args": { + "External id": 472990,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455810499.068, "dur": 18.033, + "args": { + "External id": 472991,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455810529.293, "dur": 46.255, + "args": { + "External id": 472992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455810580.265, "dur": 58.420, + "args": { + "External id": 472993,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267455810676.976, "dur": 329.718, + "args": { + "External id": 472994,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455810788.482, "dur": 8.935, + "args": { + "External id": 472995,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455810805.549, "dur": 3.098, + "args": { + "External id": 472996,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455810809.941, "dur": 2.673, + "args": { + "External id": 472997,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455810814.047, "dur": 4.668, + "args": { + "External id": 472998,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455810876.695, "dur": 6.111, + "args": { + "External id": 472999,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455810878.984, "dur": 3.603, + "args": { + "External id": 473000,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455810884.731, "dur": 44.107, + "args": { + "External id": 473001,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455810891.402, "dur": 2.145, + "args": { + "External id": 473002,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455810930.631, "dur": 1.952, + "args": { + "External id": 473003,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455810931.863, "dur": 0.593, + "args": { + "External id": 473004,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455810933.629, "dur": 18.682, + "args": { + "External id": 473005,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455810935.927, "dur": 0.760, + "args": { + "External id": 473006,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455811051.952, "dur": 75.612, + "args": { + "External id": 473007,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455811152.940, "dur": 23.335, + "args": { + "External id": 473008,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455811187.537, "dur": 62.703, + "args": { + "External id": 473009,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455811258.961, "dur": 59.987, + "args": { + "External id": 473010,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455811332.590, "dur": 26.846, + "args": { + "External id": 473011,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455811366.337, "dur": 42.453, + "args": { + "External id": 473012,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455811417.095, "dur": 34.585, + "args": { + "External id": 473013,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455811459.760, "dur": 35.778, + "args": { + "External id": 473014,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267455811517.407, "dur": 27.351, + "args": { + "External id": 473015,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455811562.235, "dur": 27.540, + "args": { + "External id": 473016,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455811625.811, "dur": 23.153, + "args": { + "External id": 473017,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455811667.345, "dur": 17.934, + "args": { + "External id": 473018,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267455811699.429, "dur": 18.382, + "args": { + "External id": 473019,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811809.727, "dur": 18.478, + "args": { + "External id": 473020,"Record function id": 0, "Ev Idx": 11195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811813.372, "dur": 13.586, + "args": { + "External id": 473021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811818.719, "dur": 7.182, + "args": { + "External id": 473022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811820.845, "dur": 4.926, + "args": { + "External id": 473023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811832.902, "dur": 6.374, + "args": { + "External id": 473024,"Record function id": 0, "Ev Idx": 11199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811834.371, "dur": 4.328, + "args": { + "External id": 473025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811835.379, "dur": 2.587, + "args": { + "External id": 473026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811836.648, "dur": 1.204, + "args": { + "External id": 473027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811843.174, "dur": 5.069, + "args": { + "External id": 473028,"Record function id": 0, "Ev Idx": 11203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811844.672, "dur": 3.087, + "args": { + "External id": 473029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811845.492, "dur": 1.722, + "args": { + "External id": 473030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811846.096, "dur": 1.035, + "args": { + "External id": 473031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811851.975, "dur": 4.026, + "args": { + "External id": 473032,"Record function id": 0, "Ev Idx": 11207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811853.159, "dur": 2.379, + "args": { + "External id": 473033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811853.904, "dur": 1.155, + "args": { + "External id": 473034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811854.312, "dur": 0.669, + "args": { + "External id": 473035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811859.612, "dur": 7.201, + "args": { + "External id": 473036,"Record function id": 0, "Ev Idx": 11211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811860.821, "dur": 5.518, + "args": { + "External id": 473037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811861.591, "dur": 4.105, + "args": { + "External id": 473038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811862.261, "dur": 3.351, + "args": { + "External id": 473039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811870.691, "dur": 5.288, + "args": { + "External id": 473040,"Record function id": 0, "Ev Idx": 11215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811872.224, "dur": 3.261, + "args": { + "External id": 473041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811873.233, "dur": 1.744, + "args": { + "External id": 473042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811874.052, "dur": 0.794, + "args": { + "External id": 473043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811879.747, "dur": 4.054, + "args": { + "External id": 473044,"Record function id": 0, "Ev Idx": 11219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811881.081, "dur": 2.201, + "args": { + "External id": 473045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811881.662, "dur": 1.168, + "args": { + "External id": 473046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811881.999, "dur": 0.747, + "args": { + "External id": 473047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811887.415, "dur": 4.310, + "args": { + "External id": 473048,"Record function id": 0, "Ev Idx": 11223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811888.697, "dur": 2.534, + "args": { + "External id": 473049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811889.248, "dur": 1.351, + "args": { + "External id": 473050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811889.601, "dur": 0.887, + "args": { + "External id": 473051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811895.854, "dur": 4.565, + "args": { + "External id": 473052,"Record function id": 0, "Ev Idx": 11227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455811897.065, "dur": 2.874, + "args": { + "External id": 473053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811897.642, "dur": 1.806, + "args": { + "External id": 473054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455811898.479, "dur": 0.840, + "args": { + "External id": 473055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455811905.556, "dur": 59143.448, + "args": { + "External id": 473056,"Record function id": 0, "Sequence number": 5036602, "Fwd thread id": 1, "Ev Idx": 11231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455811907.399, "dur": 59126.906, + "args": { + "External id": 473057,"Sequence number": 5036602, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11232 + } + }, + { + "ph": "f", "id": 441, "pid": 2338707, "tid": 2379456, "ts": 6267455811907.399, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 2338707, "tid": 2379456, + "ts": 6267455811942.804, "dur": 46.558, + "args": { + "External id": 473058,"Record function id": 0, "Ev Idx": 11233 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 2338707, "tid": 2379456, + "ts": 6267455811999.668, "dur": 120.572, + "args": { + "External id": 473059,"Record function id": 0, "Ev Idx": 11234 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 2338707, "tid": 2379456, + "ts": 6267455812131.261, "dur": 58893.000, + "args": { + "External id": 473060,"Record function id": 0, "Ev Idx": 11235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455812244.067, "dur": 9.426, + "args": { + "External id": 473061,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455812265.835, "dur": 5.925, + "args": { + "External id": 473062,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455812289.847, "dur": 57504.613, + "args": { + "External id": 473063,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455812306.785, "dur": 57470.798, + "args": { + "External id": 473064,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455812413.770, "dur": 20.617, + "args": { + "External id": 473065,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455812458.525, "dur": 57259.186, + "args": { + "External id": 473066,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455812461.964, "dur": 57254.428, + "args": { + "External id": 473067,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455812467.867, "dur": 11.010, + "args": { + "External id": 473068,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455812481.795, "dur": 57227.252, + "args": { + "External id": 473069,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455869938.070, "dur": 16.808, + "args": { + "External id": 473070,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455869943.593, "dur": 10.825, + "args": { + "External id": 473071,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455869996.601, "dur": 493.585, + "args": { + "External id": 473072,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455870037.545, "dur": 444.218, + "args": { + "External id": 473073,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11248, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455870086.186, "dur": 388.087, + "args": { + "External id": 473074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455870519.171, "dur": 2.636, + "args": { + "External id": 473075,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11250, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455870617.905, "dur": 9.480, + "args": { + "External id": 473076,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455870645.644, "dur": 47.923, + "args": { + "External id": 473077,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455870708.106, "dur": 4.911, + "args": { + "External id": 473078,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455870719.556, "dur": 17.218, + "args": { + "External id": 473079,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455870743.330, "dur": 1.064, + "args": { + "External id": 473080,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455870750.720, "dur": 14.112, + "args": { + "External id": 473081,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455870778.315, "dur": 0.885, + "args": { + "External id": 473082,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455870784.213, "dur": 12.907, + "args": { + "External id": 473083,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455870802.837, "dur": 0.938, + "args": { + "External id": 473084,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455870808.699, "dur": 11.141, + "args": { + "External id": 473085,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455870824.827, "dur": 1.253, + "args": { + "External id": 473086,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455870830.759, "dur": 12.289, + "args": { + "External id": 473087,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455870849.991, "dur": 1.106, + "args": { + "External id": 473088,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455870877.071, "dur": 12.692, + "args": { + "External id": 473089,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455870905.194, "dur": 0.973, + "args": { + "External id": 473090,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455870910.881, "dur": 14.729, + "args": { + "External id": 473091,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455870931.654, "dur": 0.996, + "args": { + "External id": 473092,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455870937.183, "dur": 13.273, + "args": { + "External id": 473093,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455871113.501, "dur": 3505.047, + "args": { + "External id": 473094,"Record function id": 0, "Ev Idx": 11269 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 2338707, "tid": 2379456, + "ts": 6267455871144.919, "dur": 1284.807, + "args": { + "External id": 473095,"Record function id": 0, "Ev Idx": 11270 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338707, "tid": 2379456, + "ts": 6267455871164.648, "dur": 388.653, + "args": { + "External id": 473096,"Record function id": 0, "Ev Idx": 11271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455871264.253, "dur": 8.437, + "args": { + "External id": 473097,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455871276.690, "dur": 0.897, + "args": { + "External id": 473098,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455871280.395, "dur": 0.929, + "args": { + "External id": 473099,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455871283.200, "dur": 0.812, + "args": { + "External id": 473100,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455871286.012, "dur": 0.949, + "args": { + "External id": 473101,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455871288.770, "dur": 0.795, + "args": { + "External id": 473102,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455871291.210, "dur": 1.158, + "args": { + "External id": 473103,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455871296.410, "dur": 0.872, + "args": { + "External id": 473104,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455871298.638, "dur": 3.141, + "args": { + "External id": 473105,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455871303.114, "dur": 1.110, + "args": { + "External id": 473106,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455871325.280, "dur": 190.967, + "args": { + "External id": 473107,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455871344.442, "dur": 165.724, + "args": { + "External id": 473108,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455871373.495, "dur": 18.389, + "args": { + "External id": 473109,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455871396.082, "dur": 83.475, + "args": { + "External id": 473110,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455871401.273, "dur": 77.800, + "args": { + "External id": 473111,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871405.867, "dur": 7.967, + "args": { + "External id": 473112,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455871415.739, "dur": 62.416, + "args": { + "External id": 473113,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11288 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 2338707, "tid": 2379456, + "ts": 6267455871681.778, "dur": 737.925, + "args": { + "External id": 473114,"Record function id": 0, "Ev Idx": 11289 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338707, "tid": 2379456, + "ts": 6267455871703.820, "dur": 700.911, + "args": { + "External id": 473115,"Record function id": 0, "Ev Idx": 11290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455871779.873, "dur": 7.190, + "args": { + "External id": 473116,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455871806.542, "dur": 33.162, + "args": { + "External id": 473117,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871813.407, "dur": 2.113, + "args": { + "External id": 473118,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871817.736, "dur": 0.933, + "args": { + "External id": 473119,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871820.811, "dur": 0.527, + "args": { + "External id": 473120,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871822.640, "dur": 2.894, + "args": { + "External id": 473121,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871827.522, "dur": 0.428, + "args": { + "External id": 473122,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871829.582, "dur": 0.390, + "args": { + "External id": 473123,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871831.149, "dur": 0.521, + "args": { + "External id": 473124,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871833.355, "dur": 0.706, + "args": { + "External id": 473125,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871835.839, "dur": 0.403, + "args": { + "External id": 473126,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455871852.723, "dur": 50.630, + "args": { + "External id": 473127,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267455871938.529, "dur": 174.005, + "args": { + "External id": 473128,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 11303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455871950.404, "dur": 4.183, + "args": { + "External id": 473129,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267455871960.380, "dur": 11.852, + "args": { + "External id": 473130,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267455871965.198, "dur": 6.581, + "args": { + "External id": 473131,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 11306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871969.702, "dur": 0.697, + "args": { + "External id": 473132,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455871980.247, "dur": 31.705, + "args": { + "External id": 473133,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871982.489, "dur": 3.510, + "args": { + "External id": 473134,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871987.673, "dur": 0.883, + "args": { + "External id": 473135,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871989.824, "dur": 0.383, + "args": { + "External id": 473136,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871992.150, "dur": 0.757, + "args": { + "External id": 473137,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871994.704, "dur": 0.409, + "args": { + "External id": 473138,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871996.093, "dur": 0.954, + "args": { + "External id": 473139,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455871998.565, "dur": 0.682, + "args": { + "External id": 473140,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455872000.749, "dur": 0.427, + "args": { + "External id": 473141,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455872002.400, "dur": 3.083, + "args": { + "External id": 473142,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455872022.771, "dur": 78.609, + "args": { + "External id": 473143,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455872173.626, "dur": 145.812, + "args": { + "External id": 473144,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 11319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455872207.745, "dur": 107.606, + "args": { + "External id": 473145,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11320, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455872219.738, "dur": 90.640, + "args": { + "External id": 473146,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 11321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455872340.292, "dur": 2.349, + "args": { + "External id": 473147,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11322, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455872438.166, "dur": 2139.488, + "args": { + "External id": 473148,"Sequence number": 5036601, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11323 + } + }, + { + "ph": "f", "id": 442, "pid": 2338707, "tid": 2379456, "ts": 6267455872438.166, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455872568.190, "dur": 150.390, + "args": { + "External id": 473149,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455872775.699, "dur": 49.690, + "args": { + "External id": 473150,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267455872849.433, "dur": 61.694, + "args": { + "External id": 473151,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455872922.526, "dur": 38.389, + "args": { + "External id": 473152,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455872968.149, "dur": 39.684, + "args": { + "External id": 473153,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455873016.152, "dur": 32.734, + "args": { + "External id": 473154,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455873102.449, "dur": 43.078, + "args": { + "External id": 473155,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455873181.631, "dur": 31.833, + "args": { + "External id": 473156,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455873234.724, "dur": 34.229, + "args": { + "External id": 473157,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455873293.262, "dur": 24.339, + "args": { + "External id": 473158,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455873333.620, "dur": 24.071, + "args": { + "External id": 473159,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455873370.512, "dur": 49.249, + "args": { + "External id": 473160,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455873424.491, "dur": 60.159, + "args": { + "External id": 473161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267455873528.802, "dur": 332.094, + "args": { + "External id": 473162,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455873648.292, "dur": 8.587, + "args": { + "External id": 473163,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455873660.202, "dur": 3.097, + "args": { + "External id": 473164,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455873664.865, "dur": 2.823, + "args": { + "External id": 473165,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455873669.524, "dur": 4.795, + "args": { + "External id": 473166,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455873734.718, "dur": 6.264, + "args": { + "External id": 473167,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455873737.061, "dur": 3.683, + "args": { + "External id": 473168,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455873743.305, "dur": 40.037, + "args": { + "External id": 473169,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455873749.784, "dur": 2.296, + "args": { + "External id": 473170,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455873785.200, "dur": 2.034, + "args": { + "External id": 473171,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455873786.382, "dur": 0.756, + "args": { + "External id": 473172,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455873788.628, "dur": 19.361, + "args": { + "External id": 473173,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455873791.314, "dur": 1.031, + "args": { + "External id": 473174,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455873901.425, "dur": 34.521, + "args": { + "External id": 473175,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455873954.988, "dur": 24.661, + "args": { + "External id": 473176,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455873989.629, "dur": 62.789, + "args": { + "External id": 473177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455874103.886, "dur": 59.044, + "args": { + "External id": 473178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455874177.993, "dur": 27.038, + "args": { + "External id": 473179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455874212.664, "dur": 38.754, + "args": { + "External id": 473180,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455874260.330, "dur": 32.470, + "args": { + "External id": 473181,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455874301.455, "dur": 35.481, + "args": { + "External id": 473182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267455874360.824, "dur": 28.831, + "args": { + "External id": 473183,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455874408.442, "dur": 27.599, + "args": { + "External id": 473184,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455874451.393, "dur": 20.745, + "args": { + "External id": 473185,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455874488.934, "dur": 17.283, + "args": { + "External id": 473186,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267455874520.334, "dur": 18.385, + "args": { + "External id": 473187,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874647.058, "dur": 19.670, + "args": { + "External id": 473188,"Record function id": 0, "Ev Idx": 11363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874651.494, "dur": 14.047, + "args": { + "External id": 473189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874656.777, "dur": 7.361, + "args": { + "External id": 473190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874659.098, "dur": 4.905, + "args": { + "External id": 473191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874671.484, "dur": 5.937, + "args": { + "External id": 473192,"Record function id": 0, "Ev Idx": 11367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874673.104, "dur": 3.764, + "args": { + "External id": 473193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874673.979, "dur": 2.206, + "args": { + "External id": 473194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874674.976, "dur": 1.127, + "args": { + "External id": 473195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874681.192, "dur": 5.453, + "args": { + "External id": 473196,"Record function id": 0, "Ev Idx": 11371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874682.768, "dur": 3.373, + "args": { + "External id": 473197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874683.891, "dur": 1.693, + "args": { + "External id": 473198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874684.500, "dur": 0.997, + "args": { + "External id": 473199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874690.643, "dur": 5.108, + "args": { + "External id": 473200,"Record function id": 0, "Ev Idx": 11375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874692.448, "dur": 2.821, + "args": { + "External id": 473201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874693.044, "dur": 1.733, + "args": { + "External id": 473202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874693.888, "dur": 0.810, + "args": { + "External id": 473203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874710.920, "dur": 7.645, + "args": { + "External id": 473204,"Record function id": 0, "Ev Idx": 11379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874712.289, "dur": 5.752, + "args": { + "External id": 473205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874713.143, "dur": 4.044, + "args": { + "External id": 473206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874713.953, "dur": 3.159, + "args": { + "External id": 473207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874722.424, "dur": 7.614, + "args": { + "External id": 473208,"Record function id": 0, "Ev Idx": 11383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874724.354, "dur": 5.183, + "args": { + "External id": 473209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874724.899, "dur": 4.150, + "args": { + "External id": 473210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874727.725, "dur": 1.178, + "args": { + "External id": 473211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874733.875, "dur": 5.176, + "args": { + "External id": 473212,"Record function id": 0, "Ev Idx": 11387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874735.565, "dur": 2.880, + "args": { + "External id": 473213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874736.412, "dur": 1.541, + "args": { + "External id": 473214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874737.160, "dur": 0.714, + "args": { + "External id": 473215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874742.665, "dur": 5.148, + "args": { + "External id": 473216,"Record function id": 0, "Ev Idx": 11391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874744.485, "dur": 2.798, + "args": { + "External id": 473217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874745.005, "dur": 1.813, + "args": { + "External id": 473218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874745.654, "dur": 1.053, + "args": { + "External id": 473219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874752.312, "dur": 4.862, + "args": { + "External id": 473220,"Record function id": 0, "Ev Idx": 11395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455874753.694, "dur": 2.988, + "args": { + "External id": 473221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874754.984, "dur": 1.242, + "args": { + "External id": 473222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455874755.493, "dur": 0.659, + "args": { + "External id": 473223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455874762.276, "dur": 61092.149, + "args": { + "External id": 473224,"Record function id": 0, "Sequence number": 5036600, "Fwd thread id": 1, "Ev Idx": 11399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455874764.018, "dur": 61079.041, + "args": { + "External id": 473225,"Sequence number": 5036600, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11400 + } + }, + { + "ph": "f", "id": 443, "pid": 2338707, "tid": 2379456, "ts": 6267455874764.018, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 2338707, "tid": 2379456, + "ts": 6267455874800.387, "dur": 46.985, + "args": { + "External id": 473226,"Record function id": 0, "Ev Idx": 11401 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 2338707, "tid": 2379456, + "ts": 6267455874857.288, "dur": 75.013, + "args": { + "External id": 473227,"Record function id": 0, "Ev Idx": 11402 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 2338707, "tid": 2379456, + "ts": 6267455874939.126, "dur": 60895.231, + "args": { + "External id": 473228,"Record function id": 0, "Ev Idx": 11403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455875047.782, "dur": 48.578, + "args": { + "External id": 473229,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455875113.148, "dur": 5.829, + "args": { + "External id": 473230,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455875139.451, "dur": 59508.505, + "args": { + "External id": 473231,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455875156.796, "dur": 59474.680, + "args": { + "External id": 473232,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455875278.773, "dur": 28.582, + "args": { + "External id": 473233,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455875331.681, "dur": 59236.080, + "args": { + "External id": 473234,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455875335.371, "dur": 59231.345, + "args": { + "External id": 473235,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455875341.172, "dur": 17.064, + "args": { + "External id": 473236,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455875360.798, "dur": 59201.378, + "args": { + "External id": 473237,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455934768.370, "dur": 13.250, + "args": { + "External id": 473238,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455934772.503, "dur": 8.623, + "args": { + "External id": 473239,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455934818.361, "dur": 533.860, + "args": { + "External id": 473240,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455934853.961, "dur": 490.798, + "args": { + "External id": 473241,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11416, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455934868.156, "dur": 467.841, + "args": { + "External id": 473242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455935382.026, "dur": 2.935, + "args": { + "External id": 473243,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11418, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455935463.105, "dur": 9.472, + "args": { + "External id": 473244,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455935486.616, "dur": 43.996, + "args": { + "External id": 473245,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455935542.908, "dur": 2.361, + "args": { + "External id": 473246,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455935551.439, "dur": 16.265, + "args": { + "External id": 473247,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455935574.692, "dur": 1.007, + "args": { + "External id": 473248,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455935581.523, "dur": 30.333, + "args": { + "External id": 473249,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455935622.261, "dur": 1.556, + "args": { + "External id": 473250,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455935629.499, "dur": 16.926, + "args": { + "External id": 473251,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455935651.160, "dur": 1.161, + "args": { + "External id": 473252,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455935656.586, "dur": 13.388, + "args": { + "External id": 473253,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455935674.941, "dur": 1.517, + "args": { + "External id": 473254,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455935681.347, "dur": 13.488, + "args": { + "External id": 473255,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455935699.469, "dur": 0.809, + "args": { + "External id": 473256,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455935706.711, "dur": 11.455, + "args": { + "External id": 473257,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455935723.319, "dur": 1.125, + "args": { + "External id": 473258,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455935728.937, "dur": 12.068, + "args": { + "External id": 473259,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455935745.908, "dur": 3.278, + "args": { + "External id": 473260,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267455935753.451, "dur": 11.768, + "args": { + "External id": 473261,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455935873.787, "dur": 3561.156, + "args": { + "External id": 473262,"Record function id": 0, "Ev Idx": 11437 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 2338707, "tid": 2379456, + "ts": 6267455935897.926, "dur": 1333.742, + "args": { + "External id": 473263,"Record function id": 0, "Ev Idx": 11438 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338707, "tid": 2379456, + "ts": 6267455935915.620, "dur": 436.454, + "args": { + "External id": 473264,"Record function id": 0, "Ev Idx": 11439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455936012.556, "dur": 4.793, + "args": { + "External id": 473265,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455936020.827, "dur": 0.977, + "args": { + "External id": 473266,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455936026.372, "dur": 1.276, + "args": { + "External id": 473267,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455936029.790, "dur": 1.227, + "args": { + "External id": 473268,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455936032.617, "dur": 1.159, + "args": { + "External id": 473269,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455936035.636, "dur": 1.375, + "args": { + "External id": 473270,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455936040.648, "dur": 0.963, + "args": { + "External id": 473271,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455936043.217, "dur": 4.181, + "args": { + "External id": 473272,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455936049.276, "dur": 0.651, + "args": { + "External id": 473273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455936051.438, "dur": 1.194, + "args": { + "External id": 473274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455936126.711, "dur": 186.615, + "args": { + "External id": 473275,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455936146.616, "dur": 160.739, + "args": { + "External id": 473276,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455936176.162, "dur": 17.848, + "args": { + "External id": 473277,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455936198.374, "dur": 77.955, + "args": { + "External id": 473278,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455936201.771, "dur": 74.065, + "args": { + "External id": 473279,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936206.596, "dur": 7.170, + "args": { + "External id": 473280,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455936215.714, "dur": 59.387, + "args": { + "External id": 473281,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11456 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 2338707, "tid": 2379456, + "ts": 6267455936451.927, "dur": 771.250, + "args": { + "External id": 473282,"Record function id": 0, "Ev Idx": 11457 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338707, "tid": 2379456, + "ts": 6267455936471.424, "dur": 736.276, + "args": { + "External id": 473283,"Record function id": 0, "Ev Idx": 11458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455936544.968, "dur": 6.080, + "args": { + "External id": 473284,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455936569.273, "dur": 54.437, + "args": { + "External id": 473285,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936576.111, "dur": 1.876, + "args": { + "External id": 473286,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936580.078, "dur": 1.030, + "args": { + "External id": 473287,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936583.060, "dur": 3.350, + "args": { + "External id": 473288,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936588.113, "dur": 0.386, + "args": { + "External id": 473289,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936589.907, "dur": 0.565, + "args": { + "External id": 473290,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936611.851, "dur": 0.781, + "args": { + "External id": 473291,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936614.764, "dur": 0.351, + "args": { + "External id": 473292,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936616.299, "dur": 0.609, + "args": { + "External id": 473293,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936619.147, "dur": 0.647, + "args": { + "External id": 473294,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455936637.138, "dur": 52.855, + "args": { + "External id": 473295,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2379456, + "ts": 6267455936728.682, "dur": 141.958, + "args": { + "External id": 473296,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 11471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455936741.593, "dur": 4.853, + "args": { + "External id": 473297,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2379456, + "ts": 6267455936756.011, "dur": 13.500, + "args": { + "External id": 473298,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2379456, + "ts": 6267455936760.829, "dur": 8.213, + "args": { + "External id": 473299,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 11474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936764.666, "dur": 2.618, + "args": { + "External id": 473300,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2379456, + "ts": 6267455936777.735, "dur": 25.460, + "args": { + "External id": 473301,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936779.998, "dur": 0.599, + "args": { + "External id": 473302,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936782.182, "dur": 0.677, + "args": { + "External id": 473303,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936784.324, "dur": 0.419, + "args": { + "External id": 473304,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936786.245, "dur": 0.658, + "args": { + "External id": 473305,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936788.050, "dur": 0.496, + "args": { + "External id": 473306,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936790.266, "dur": 0.355, + "args": { + "External id": 473307,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936792.101, "dur": 0.419, + "args": { + "External id": 473308,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936793.556, "dur": 2.941, + "args": { + "External id": 473309,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455936798.108, "dur": 0.372, + "args": { + "External id": 473310,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455936823.524, "dur": 35.091, + "args": { + "External id": 473311,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267455936927.909, "dur": 184.127, + "args": { + "External id": 473312,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 11487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455936959.404, "dur": 147.487, + "args": { + "External id": 473313,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11488, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2379456, + "ts": 6267455936971.109, "dur": 128.690, + "args": { + "External id": 473314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 11489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267455937136.481, "dur": 2.847, + "args": { + "External id": 473315,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11490, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455937241.466, "dur": 2168.632, + "args": { + "External id": 473316,"Sequence number": 5036599, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11491 + } + }, + { + "ph": "f", "id": 444, "pid": 2338707, "tid": 2379456, "ts": 6267455937241.466, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455937373.713, "dur": 133.391, + "args": { + "External id": 473317,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455937561.634, "dur": 89.365, + "args": { + "External id": 473318,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267455937679.635, "dur": 72.333, + "args": { + "External id": 473319,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455937764.643, "dur": 38.283, + "args": { + "External id": 473320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455937810.883, "dur": 41.227, + "args": { + "External id": 473321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455937859.796, "dur": 32.769, + "args": { + "External id": 473322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455937903.225, "dur": 34.673, + "args": { + "External id": 473323,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455937971.666, "dur": 30.276, + "args": { + "External id": 473324,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267455938024.575, "dur": 74.937, + "args": { + "External id": 473325,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455938131.812, "dur": 26.914, + "args": { + "External id": 473326,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455938175.917, "dur": 18.597, + "args": { + "External id": 473327,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455938208.048, "dur": 50.965, + "args": { + "External id": 473328,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455938263.477, "dur": 39.939, + "args": { + "External id": 473329,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267455938352.031, "dur": 328.671, + "args": { + "External id": 473330,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455938452.388, "dur": 9.240, + "args": { + "External id": 473331,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455938464.434, "dur": 3.493, + "args": { + "External id": 473332,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455938469.493, "dur": 2.769, + "args": { + "External id": 473333,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455938473.632, "dur": 5.433, + "args": { + "External id": 473334,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455938535.635, "dur": 6.222, + "args": { + "External id": 473335,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455938537.891, "dur": 3.701, + "args": { + "External id": 473336,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455938543.818, "dur": 38.341, + "args": { + "External id": 473337,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455938550.098, "dur": 2.329, + "args": { + "External id": 473338,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267455938584.025, "dur": 2.083, + "args": { + "External id": 473339,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455938585.360, "dur": 0.658, + "args": { + "External id": 473340,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267455938587.384, "dur": 38.178, + "args": { + "External id": 473341,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455938589.552, "dur": 0.526, + "args": { + "External id": 473342,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267455938723.450, "dur": 35.651, + "args": { + "External id": 473343,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455938779.088, "dur": 20.282, + "args": { + "External id": 473344,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455938809.745, "dur": 58.605, + "args": { + "External id": 473345,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455938875.989, "dur": 50.539, + "args": { + "External id": 473346,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455938939.947, "dur": 27.415, + "args": { + "External id": 473347,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455938975.702, "dur": 38.858, + "args": { + "External id": 473348,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455939023.557, "dur": 72.581, + "args": { + "External id": 473349,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267455939109.994, "dur": 43.888, + "args": { + "External id": 473350,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267455939177.529, "dur": 29.587, + "args": { + "External id": 473351,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455939229.648, "dur": 28.490, + "args": { + "External id": 473352,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267455939277.056, "dur": 22.176, + "args": { + "External id": 473353,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267455939313.939, "dur": 23.177, + "args": { + "External id": 473354,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267455939350.103, "dur": 19.676, + "args": { + "External id": 473355,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939461.990, "dur": 18.137, + "args": { + "External id": 473356,"Record function id": 0, "Ev Idx": 11531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939465.955, "dur": 13.082, + "args": { + "External id": 473357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939470.844, "dur": 6.760, + "args": { + "External id": 473358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939472.757, "dur": 4.726, + "args": { + "External id": 473359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939484.807, "dur": 5.639, + "args": { + "External id": 473360,"Record function id": 0, "Ev Idx": 11535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939486.365, "dur": 3.514, + "args": { + "External id": 473361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939487.513, "dur": 1.777, + "args": { + "External id": 473362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939488.336, "dur": 0.835, + "args": { + "External id": 473363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939494.350, "dur": 4.850, + "args": { + "External id": 473364,"Record function id": 0, "Ev Idx": 11539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939495.874, "dur": 2.837, + "args": { + "External id": 473365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939496.709, "dur": 1.483, + "args": { + "External id": 473366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939497.350, "dur": 0.767, + "args": { + "External id": 473367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939503.077, "dur": 4.801, + "args": { + "External id": 473368,"Record function id": 0, "Ev Idx": 11543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939504.309, "dur": 3.056, + "args": { + "External id": 473369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939504.860, "dur": 1.963, + "args": { + "External id": 473370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939506.044, "dur": 0.679, + "args": { + "External id": 473371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939511.600, "dur": 4.229, + "args": { + "External id": 473372,"Record function id": 0, "Ev Idx": 11547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939512.929, "dur": 2.432, + "args": { + "External id": 473373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939513.517, "dur": 1.170, + "args": { + "External id": 473374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939513.870, "dur": 0.742, + "args": { + "External id": 473375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939519.571, "dur": 7.139, + "args": { + "External id": 473376,"Record function id": 0, "Ev Idx": 11551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939521.081, "dur": 5.109, + "args": { + "External id": 473377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939521.723, "dur": 3.915, + "args": { + "External id": 473378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939522.123, "dur": 3.428, + "args": { + "External id": 473379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939530.789, "dur": 5.351, + "args": { + "External id": 473380,"Record function id": 0, "Ev Idx": 11555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939532.167, "dur": 3.483, + "args": { + "External id": 473381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939532.777, "dur": 2.347, + "args": { + "External id": 473382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939534.170, "dur": 0.816, + "args": { + "External id": 473383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939540.667, "dur": 4.000, + "args": { + "External id": 473384,"Record function id": 0, "Ev Idx": 11559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939541.777, "dur": 2.398, + "args": { + "External id": 473385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939542.326, "dur": 1.246, + "args": { + "External id": 473386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939542.828, "dur": 0.657, + "args": { + "External id": 473387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939548.997, "dur": 4.959, + "args": { + "External id": 473388,"Record function id": 0, "Ev Idx": 11563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267455939550.790, "dur": 2.697, + "args": { + "External id": 473389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939551.297, "dur": 1.465, + "args": { + "External id": 473390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267455939551.812, "dur": 0.810, + "args": { + "External id": 473391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455939559.200, "dur": 62958.330, + "args": { + "External id": 473392,"Record function id": 0, "Sequence number": 5036598, "Fwd thread id": 1, "Ev Idx": 11567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267455939560.947, "dur": 62945.629, + "args": { + "External id": 473393,"Sequence number": 5036598, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11568 + } + }, + { + "ph": "f", "id": 445, "pid": 2338707, "tid": 2379456, "ts": 6267455939560.947, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 2338707, "tid": 2379456, + "ts": 6267455939614.613, "dur": 46.882, + "args": { + "External id": 473394,"Record function id": 0, "Ev Idx": 11569 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 2338707, "tid": 2379456, + "ts": 6267455939671.126, "dur": 77.917, + "args": { + "External id": 473395,"Record function id": 0, "Ev Idx": 11570 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 2338707, "tid": 2379456, + "ts": 6267455939755.849, "dur": 62740.914, + "args": { + "External id": 473396,"Record function id": 0, "Ev Idx": 11571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455939863.974, "dur": 7.912, + "args": { + "External id": 473397,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267455939884.703, "dur": 5.918, + "args": { + "External id": 473398,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455939908.296, "dur": 61439.407, + "args": { + "External id": 473399,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267455939924.578, "dur": 61406.122, + "args": { + "External id": 473400,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267455940112.386, "dur": 28.437, + "args": { + "External id": 473401,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267455940172.494, "dur": 61102.554, + "args": { + "External id": 473402,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267455940175.889, "dur": 61097.222, + "args": { + "External id": 473403,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267455940182.044, "dur": 14.971, + "args": { + "External id": 473404,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267455940199.598, "dur": 61070.438, + "args": { + "External id": 473405,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267456001488.281, "dur": 16.584, + "args": { + "External id": 473406,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267456001493.127, "dur": 11.269, + "args": { + "External id": 473407,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267456001546.009, "dur": 456.266, + "args": { + "External id": 473408,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267456001585.843, "dur": 409.640, + "args": { + "External id": 473409,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11584, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267456001615.673, "dur": 372.802, + "args": { + "External id": 473410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267456002031.453, "dur": 2.886, + "args": { + "External id": 473411,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11586, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456002150.524, "dur": 8.496, + "args": { + "External id": 473412,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456002173.264, "dur": 44.270, + "args": { + "External id": 473413,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456002229.526, "dur": 1.824, + "args": { + "External id": 473414,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456002244.961, "dur": 15.150, + "args": { + "External id": 473415,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456002266.518, "dur": 1.248, + "args": { + "External id": 473416,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456002272.988, "dur": 12.048, + "args": { + "External id": 473417,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456002292.772, "dur": 3.932, + "args": { + "External id": 473418,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456002301.091, "dur": 12.466, + "args": { + "External id": 473419,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456002318.736, "dur": 0.970, + "args": { + "External id": 473420,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456002324.647, "dur": 11.399, + "args": { + "External id": 473421,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456002340.856, "dur": 1.142, + "args": { + "External id": 473422,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456002345.802, "dur": 11.679, + "args": { + "External id": 473423,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456002364.440, "dur": 1.040, + "args": { + "External id": 473424,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456002370.061, "dur": 11.789, + "args": { + "External id": 473425,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456002386.841, "dur": 1.340, + "args": { + "External id": 473426,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456002392.935, "dur": 11.654, + "args": { + "External id": 473427,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456002409.793, "dur": 1.055, + "args": { + "External id": 473428,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456002415.084, "dur": 11.029, + "args": { + "External id": 473429,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267456002535.994, "dur": 2781.268, + "args": { + "External id": 473430,"Record function id": 0, "Ev Idx": 11605 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 2338707, "tid": 2379456, + "ts": 6267456002559.629, "dur": 602.632, + "args": { + "External id": 473431,"Record function id": 0, "Ev Idx": 11606 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338707, "tid": 2379456, + "ts": 6267456002577.222, "dur": 424.127, + "args": { + "External id": 473432,"Record function id": 0, "Ev Idx": 11607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267456002695.576, "dur": 5.893, + "args": { + "External id": 473433,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267456002705.410, "dur": 0.986, + "args": { + "External id": 473434,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267456002709.095, "dur": 3.194, + "args": { + "External id": 473435,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267456002714.032, "dur": 1.264, + "args": { + "External id": 473436,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267456002716.943, "dur": 1.135, + "args": { + "External id": 473437,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267456002721.531, "dur": 0.904, + "args": { + "External id": 473438,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267456002724.148, "dur": 0.882, + "args": { + "External id": 473439,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267456002726.411, "dur": 2.466, + "args": { + "External id": 473440,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267456002730.903, "dur": 0.949, + "args": { + "External id": 473441,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267456002735.030, "dur": 1.006, + "args": { + "External id": 473442,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267456002757.423, "dur": 201.916, + "args": { + "External id": 473443,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267456002776.756, "dur": 176.383, + "args": { + "External id": 473444,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267456002809.694, "dur": 20.463, + "args": { + "External id": 473445,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267456002834.941, "dur": 86.836, + "args": { + "External id": 473446,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267456002838.255, "dur": 83.091, + "args": { + "External id": 473447,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456002847.371, "dur": 7.141, + "args": { + "External id": 473448,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267456002856.388, "dur": 63.970, + "args": { + "External id": 473449,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267456003173.901, "dur": 2116.905, + "args": { + "External id": 473450,"Sequence number": 5036597, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11625 + } + }, + { + "ph": "f", "id": 446, "pid": 2338707, "tid": 2379456, "ts": 6267456003173.901, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267456003305.237, "dur": 133.435, + "args": { + "External id": 473451,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338707, "tid": 2379456, + "ts": 6267456003497.135, "dur": 46.047, + "args": { + "External id": 473452,"kernel_hash": "cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/u7/cu73slalpcimolzdcsth5yc2bffweqdrn5mjolw7iwdski5pzujs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338707, "tid": 2379456, + "ts": 6267456003565.661, "dur": 85.158, + "args": { + "External id": 473453,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267456003671.861, "dur": 42.968, + "args": { + "External id": 473454,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267456003724.265, "dur": 41.836, + "args": { + "External id": 473455,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267456003776.033, "dur": 33.540, + "args": { + "External id": 473456,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267456003821.809, "dur": 36.518, + "args": { + "External id": 473457,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338707, "tid": 2379456, + "ts": 6267456003888.024, "dur": 29.592, + "args": { + "External id": 473458,"kernel_hash": "cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/wm/cwmdtyuwgj7j3fxg57ckso72wqi2qzdp23gsirbcv7772zguon5s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338707, "tid": 2379456, + "ts": 6267456003937.915, "dur": 31.668, + "args": { + "External id": 473459,"kernel_hash": "cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqtkq7myps3bnwtywqmkcfinhjn2cq7yyi2rklvdi3lxtis2knql.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267456003993.574, "dur": 22.643, + "args": { + "External id": 473460,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267456004031.094, "dur": 16.256, + "args": { + "External id": 473461,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267456004099.832, "dur": 53.106, + "args": { + "External id": 473462,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267456004157.785, "dur": 38.983, + "args": { + "External id": 473463,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338707, "tid": 2379456, + "ts": 6267456004233.671, "dur": 291.963, + "args": { + "External id": 473464,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267456004325.460, "dur": 7.975, + "args": { + "External id": 473465,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267456004335.990, "dur": 3.567, + "args": { + "External id": 473466,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267456004341.136, "dur": 2.679, + "args": { + "External id": 473467,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267456004345.340, "dur": 4.951, + "args": { + "External id": 473468,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267456004398.409, "dur": 6.105, + "args": { + "External id": 473469,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267456004400.699, "dur": 3.624, + "args": { + "External id": 473470,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267456004406.611, "dur": 40.650, + "args": { + "External id": 473471,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456004412.917, "dur": 1.983, + "args": { + "External id": 473472,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2379456, + "ts": 6267456004449.350, "dur": 2.207, + "args": { + "External id": 473473,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267456004450.589, "dur": 0.880, + "args": { + "External id": 473474,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2379456, + "ts": 6267456004453.136, "dur": 16.996, + "args": { + "External id": 473475,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456004456.107, "dur": 0.699, + "args": { + "External id": 473476,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2379456, + "ts": 6267456004569.557, "dur": 53.581, + "args": { + "External id": 473477,"kernel_hash": "cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/yk/cyklfqyq3ruewgyv73sfwklyujhcs5pkt5uoemzhq5k47tccech4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2379456, + "ts": 6267456004646.316, "dur": 20.175, + "args": { + "External id": 473478,"kernel_hash": "ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/eu/ceuzvztw4yvdvos35uwgo5c2k6tiuymkspbfjem2pq5pqcfmcgjg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267456004680.892, "dur": 54.011, + "args": { + "External id": 473479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267456004746.361, "dur": 45.122, + "args": { + "External id": 473480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267456004803.958, "dur": 24.413, + "args": { + "External id": 473481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267456004835.792, "dur": 37.163, + "args": { + "External id": 473482,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267456004881.760, "dur": 32.607, + "args": { + "External id": 473483,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2379456, + "ts": 6267456004923.064, "dur": 37.852, + "args": { + "External id": 473484,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338707, "tid": 2379456, + "ts": 6267456005001.457, "dur": 34.896, + "args": { + "External id": 473485,"kernel_hash": "c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/6w/c6w57dwqlavni5blikci5m4csat5hnlhey5taj7vfkseu26zork7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338707, "tid": 2379456, + "ts": 6267456005092.641, "dur": 35.144, + "args": { + "External id": 473486,"kernel_hash": "ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/kj/ckjppy65bf3dvikk7c5b6blgzdmfgaljxyfbqmudoshkdutvtokx.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338707, "tid": 2379456, + "ts": 6267456005146.874, "dur": 24.278, + "args": { + "External id": 473487,"kernel_hash": "c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/4m/c4mjgau2yanfqumxpspcb2s3xxxwzlinzlyupgfzaooqh5zvt5h5.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338707, "tid": 2379456, + "ts": 6267456005189.768, "dur": 18.248, + "args": { + "External id": 473488,"kernel_hash": "cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/y5/cy5n6jvcqjg4gnbvuoozi4zmjzoyes5m2wtuaitwk3cpetqueido.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338707, "tid": 2379456, + "ts": 6267456005226.044, "dur": 21.053, + "args": { + "External id": 473489,"kernel_hash": "cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/eb/cebp6425kwg6qjs7vc5dpxqx2tgnwzzcolkzg6q26v5rzluozz5b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005345.143, "dur": 18.777, + "args": { + "External id": 473490,"Record function id": 0, "Ev Idx": 11665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005349.131, "dur": 13.597, + "args": { + "External id": 473491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005354.311, "dur": 7.263, + "args": { + "External id": 473492,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005356.502, "dur": 4.891, + "args": { + "External id": 473493,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005368.723, "dur": 6.573, + "args": { + "External id": 473494,"Record function id": 0, "Ev Idx": 11669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005371.153, "dur": 3.609, + "args": { + "External id": 473495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005372.193, "dur": 1.907, + "args": { + "External id": 473496,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005373.025, "dur": 0.951, + "args": { + "External id": 473497,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005379.213, "dur": 6.045, + "args": { + "External id": 473498,"Record function id": 0, "Ev Idx": 11673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005381.434, "dur": 3.273, + "args": { + "External id": 473499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005382.306, "dur": 1.781, + "args": { + "External id": 473500,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005382.842, "dur": 1.149, + "args": { + "External id": 473501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005389.009, "dur": 6.019, + "args": { + "External id": 473502,"Record function id": 0, "Ev Idx": 11677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005391.352, "dur": 3.113, + "args": { + "External id": 473503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005392.135, "dur": 1.863, + "args": { + "External id": 473504,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005393.137, "dur": 0.744, + "args": { + "External id": 473505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005398.756, "dur": 4.935, + "args": { + "External id": 473506,"Record function id": 0, "Ev Idx": 11681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005400.554, "dur": 2.638, + "args": { + "External id": 473507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005401.259, "dur": 1.279, + "args": { + "External id": 473508,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005401.625, "dur": 0.836, + "args": { + "External id": 473509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005407.711, "dur": 4.426, + "args": { + "External id": 473510,"Record function id": 0, "Ev Idx": 11685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005409.203, "dur": 2.441, + "args": { + "External id": 473511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005409.896, "dur": 1.190, + "args": { + "External id": 473512,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005410.238, "dur": 0.740, + "args": { + "External id": 473513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005415.804, "dur": 8.322, + "args": { + "External id": 473514,"Record function id": 0, "Ev Idx": 11689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005417.522, "dur": 6.109, + "args": { + "External id": 473515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005418.195, "dur": 4.946, + "args": { + "External id": 473516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005419.022, "dur": 3.995, + "args": { + "External id": 473517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005427.885, "dur": 5.458, + "args": { + "External id": 473518,"Record function id": 0, "Ev Idx": 11693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005430.128, "dur": 2.718, + "args": { + "External id": 473519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005430.794, "dur": 1.597, + "args": { + "External id": 473520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005431.288, "dur": 1.026, + "args": { + "External id": 473521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005437.223, "dur": 4.718, + "args": { + "External id": 473522,"Record function id": 0, "Ev Idx": 11697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456005438.763, "dur": 2.634, + "args": { + "External id": 473523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005439.311, "dur": 1.636, + "args": { + "External id": 473524,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456005439.827, "dur": 1.037, + "args": { + "External id": 473525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267456005446.677, "dur": 64452.631, + "args": { + "External id": 473526,"Record function id": 0, "Sequence number": 5036596, "Fwd thread id": 1, "Ev Idx": 11701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267456005448.520, "dur": 64440.149, + "args": { + "External id": 473527,"Sequence number": 5036596, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11702 + } + }, + { + "ph": "f", "id": 447, "pid": 2338707, "tid": 2379456, "ts": 6267456005448.520, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 2338707, "tid": 2379456, + "ts": 6267456005482.845, "dur": 45.290, + "args": { + "External id": 473528,"Record function id": 0, "Ev Idx": 11703 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 2338707, "tid": 2379456, + "ts": 6267456005537.552, "dur": 106.600, + "args": { + "External id": 473529,"Record function id": 0, "Ev Idx": 11704 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 2338707, "tid": 2379456, + "ts": 6267456005653.423, "dur": 64225.408, + "args": { + "External id": 473530,"Record function id": 0, "Ev Idx": 11705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267456005763.527, "dur": 8.701, + "args": { + "External id": 473531,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267456005784.675, "dur": 5.802, + "args": { + "External id": 473532,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267456005809.085, "dur": 62924.330, + "args": { + "External id": 473533,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267456005826.777, "dur": 62891.085, + "args": { + "External id": 473534,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267456005944.030, "dur": 21.857, + "args": { + "External id": 473535,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267456005989.838, "dur": 62669.374, + "args": { + "External id": 473536,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267456005993.260, "dur": 62664.690, + "args": { + "External id": 473537,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456005999.166, "dur": 10.938, + "args": { + "External id": 473538,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267456006012.483, "dur": 62638.955, + "args": { + "External id": 473539,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267456068867.197, "dur": 13.988, + "args": { + "External id": 473540,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267456068871.580, "dur": 9.082, + "args": { + "External id": 473541,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267456068923.355, "dur": 473.966, + "args": { + "External id": 473542,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267456068958.076, "dur": 432.029, + "args": { + "External id": 473543,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11718, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267456068974.257, "dur": 408.243, + "args": { + "External id": 473544,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267456069425.100, "dur": 2.859, + "args": { + "External id": 473545,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11720, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456069503.371, "dur": 8.060, + "args": { + "External id": 473546,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456069525.351, "dur": 40.544, + "args": { + "External id": 473547,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456069579.094, "dur": 1.968, + "args": { + "External id": 473548,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456069587.328, "dur": 34.453, + "args": { + "External id": 473549,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456069632.606, "dur": 1.963, + "args": { + "External id": 473550,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456069639.911, "dur": 13.854, + "args": { + "External id": 473551,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456069661.426, "dur": 3.292, + "args": { + "External id": 473552,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456069669.213, "dur": 13.273, + "args": { + "External id": 473553,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456069687.974, "dur": 1.059, + "args": { + "External id": 473554,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456069694.024, "dur": 11.387, + "args": { + "External id": 473555,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456069710.714, "dur": 1.535, + "args": { + "External id": 473556,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456069716.333, "dur": 14.629, + "args": { + "External id": 473557,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456069736.527, "dur": 1.226, + "args": { + "External id": 473558,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456069741.630, "dur": 12.518, + "args": { + "External id": 473559,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456069759.252, "dur": 1.160, + "args": { + "External id": 473560,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456069767.015, "dur": 14.722, + "args": { + "External id": 473561,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456069787.336, "dur": 1.176, + "args": { + "External id": 473562,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456069793.285, "dur": 12.850, + "args": { + "External id": 473563,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267456069919.172, "dur": 371.221, + "args": { + "External id": 473564,"Record function id": 0, "Sequence number": 5036595, "Fwd thread id": 1, "Ev Idx": 11739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338707, "tid": 2379456, + "ts": 6267456069922.467, "dur": 356.854, + "args": { + "External id": 473565,"Sequence number": 5036595, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11740 + } + }, + { + "ph": "f", "id": 448, "pid": 2338707, "tid": 2379456, "ts": 6267456069922.467, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 2338707, "tid": 2379456, + "ts": 6267456070104.546, "dur": 50.371, + "args": { + "External id": 473566,"kernel_hash": "cylhmuu3q4lld7ngapzsx2q3nsghgiylz3hysdrztph5rn33kr3r", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/yl/cylhmuu3q4lld7ngapzsx2q3nsghgiylz3hysdrztph5rn33kr3r.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 11741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 2338707, "tid": 2379456, + "ts": 6267456070175.345, "dur": 32.552, + "args": { + "External id": 473567,"kernel_hash": "cqovkpvfazf4uawh526mshntxf6zr2towc3gbwazqnfjbhqiu3u5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqovkpvfazf4uawh526mshntxf6zr2towc3gbwazqnfjbhqiu3u5.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096, 4096], [32000, 4096], []], "Ev Idx": 11742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 2338707, "tid": 2379456, + "ts": 6267456070232.644, "dur": 27.032, + "args": { + "External id": 473568,"kernel_hash": "cxsh2jiwiopwz5eatuiwkxhwp4pcrm5kca3w7qyj3ojcu3sw473n", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/xs/cxsh2jiwiopwz5eatuiwkxhwp4pcrm5kca3w7qyj3ojcu3sw473n.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 11743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456070304.867, "dur": 19.260, + "args": { + "External id": 473569,"Record function id": 0, "Ev Idx": 11744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338707, "tid": 2379456, + "ts": 6267456070308.211, "dur": 14.840, + "args": { + "External id": 473570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 11745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456070312.838, "dur": 9.218, + "args": { + "External id": 473571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 11746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2379456, + "ts": 6267456070314.843, "dur": 7.060, + "args": { + "External id": 473572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 11747 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 2338707, "tid": 2379456, + "ts": 6267456070352.189, "dur": 14812.816, + "args": { + "External id": 473573,"Record function id": 0, "Ev Idx": 11748 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 2338707, "tid": 2379456, + "ts": 6267456070374.587, "dur": 46.021, + "args": { + "External id": 473574,"Record function id": 0, "Ev Idx": 11749 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 2338707, "tid": 2379456, + "ts": 6267456070428.519, "dur": 293.247, + "args": { + "External id": 473575,"Record function id": 0, "Ev Idx": 11750 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 2338707, "tid": 2379456, + "ts": 6267456070731.084, "dur": 14110.771, + "args": { + "External id": 473576,"Record function id": 0, "Ev Idx": 11751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267456070885.154, "dur": 9.370, + "args": { + "External id": 473577,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2379456, + "ts": 6267456070907.690, "dur": 9.588, + "args": { + "External id": 473578,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 11753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267456070943.422, "dur": 11916.761, + "args": { + "External id": 473579,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 11754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338707, "tid": 2379456, + "ts": 6267456070975.661, "dur": 11867.350, + "args": { + "External id": 473580,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 11755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267456071883.177, "dur": 28.940, + "args": { + "External id": 473581,"Record function id": 0, "Concrete Inputs": ["[277237]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2379456, + "ts": 6267456072285.290, "dur": 10494.126, + "args": { + "External id": 473582,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], [], []], "Ev Idx": 11757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2379456, + "ts": 6267456072290.278, "dur": 10487.568, + "args": { + "External id": 473583,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], []], "Ev Idx": 11758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456072299.458, "dur": 18.931, + "args": { + "External id": 473584,"Record function id": 0, "Concrete Inputs": ["[277237]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2379456, + "ts": 6267456072321.858, "dur": 10447.757, + "args": { + "External id": 473585,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[277237], [277237], []], "Ev Idx": 11760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2379456, + "ts": 6267456083031.331, "dur": 15.560, + "args": { + "External id": 473586,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1134596096], [], [], [], [], []], "Ev Idx": 11761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2379456, + "ts": 6267456083036.855, "dur": 9.567, + "args": { + "External id": 473587,"Record function id": 0, "Concrete Inputs": ["[141824512]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338707, "tid": 2379456, + "ts": 6267456083120.259, "dur": 425.866, + "args": { + "External id": 473588,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[141824512], [1134596096], [], [], [], []], "Ev Idx": 11763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267456083150.743, "dur": 389.118, + "args": { + "External id": 473589,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 141824512, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1134596096], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11764, "In msg nelems": 1134596096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338707, "tid": 2379456, + "ts": 6267456083167.112, "dur": 366.610, + "args": { + "External id": 473590,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1134596096]], "Ev Idx": 11765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2379456, + "ts": 6267456083567.842, "dur": 2.963, + "args": { + "External id": 473591,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11766, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456083661.591, "dur": 8.697, + "args": { + "External id": 473592,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456083684.238, "dur": 41.249, + "args": { + "External id": 473593,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4000, 4096], [4000, 4096], []], "Ev Idx": 11768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456083736.934, "dur": 1.767, + "args": { + "External id": 473594,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456083745.513, "dur": 15.983, + "args": { + "External id": 473595,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456083767.773, "dur": 1.604, + "args": { + "External id": 473596,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "16384512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456083774.911, "dur": 13.301, + "args": { + "External id": 473597,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456083795.005, "dur": 1.294, + "args": { + "External id": 473598,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "18481664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456083801.174, "dur": 12.985, + "args": { + "External id": 473599,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456083819.820, "dur": 1.032, + "args": { + "External id": 473600,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "19005952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456083861.153, "dur": 13.369, + "args": { + "External id": 473601,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456083881.096, "dur": 3.570, + "args": { + "External id": 473602,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "19530240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456083890.521, "dur": 12.372, + "args": { + "External id": 473603,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456083908.247, "dur": 0.920, + "args": { + "External id": 473604,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456083914.249, "dur": 13.283, + "args": { + "External id": 473605,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456083932.886, "dur": 0.919, + "args": { + "External id": 473606,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "21627904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456083938.204, "dur": 13.166, + "args": { + "External id": 473607,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456083956.777, "dur": 0.954, + "args": { + "External id": 473608,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "28967936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456083962.296, "dur": 11.740, + "args": { + "External id": 473609,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456083981.990, "dur": 0.722, + "args": { + "External id": 473610,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "36307968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456083988.022, "dur": 14.339, + "args": { + "External id": 473611,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084008.002, "dur": 1.174, + "args": { + "External id": 473612,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084013.776, "dur": 13.012, + "args": { + "External id": 473613,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084032.298, "dur": 0.959, + "args": { + "External id": 473614,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "43648512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084037.891, "dur": 48.425, + "args": { + "External id": 473615,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084097.208, "dur": 1.795, + "args": { + "External id": 473616,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "45745664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084104.900, "dur": 15.823, + "args": { + "External id": 473617,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084126.583, "dur": 3.753, + "args": { + "External id": 473618,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "46269952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084135.334, "dur": 14.261, + "args": { + "External id": 473619,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084155.175, "dur": 1.081, + "args": { + "External id": 473620,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "46794240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084162.989, "dur": 13.337, + "args": { + "External id": 473621,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084182.297, "dur": 0.959, + "args": { + "External id": 473622,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084187.818, "dur": 14.002, + "args": { + "External id": 473623,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084207.441, "dur": 1.258, + "args": { + "External id": 473624,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "48891904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084213.092, "dur": 11.304, + "args": { + "External id": 473625,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084229.707, "dur": 0.965, + "args": { + "External id": 473626,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "56231936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084235.068, "dur": 11.343, + "args": { + "External id": 473627,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084253.392, "dur": 1.304, + "args": { + "External id": 473628,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "63571968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084259.381, "dur": 11.429, + "args": { + "External id": 473629,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084276.618, "dur": 0.871, + "args": { + "External id": 473630,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084281.746, "dur": 11.966, + "args": { + "External id": 473631,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084299.039, "dur": 0.997, + "args": { + "External id": 473632,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "70912512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084304.850, "dur": 11.237, + "args": { + "External id": 473633,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084321.192, "dur": 3.027, + "args": { + "External id": 473634,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73009664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084328.988, "dur": 11.559, + "args": { + "External id": 473635,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084345.637, "dur": 0.810, + "args": { + "External id": 473636,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73533952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084351.113, "dur": 10.770, + "args": { + "External id": 473637,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084367.152, "dur": 0.902, + "args": { + "External id": 473638,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "74058240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084372.937, "dur": 20.415, + "args": { + "External id": 473639,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084398.246, "dur": 0.926, + "args": { + "External id": 473640,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084403.508, "dur": 11.131, + "args": { + "External id": 473641,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084419.274, "dur": 0.977, + "args": { + "External id": 473642,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "76155904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084425.002, "dur": 11.824, + "args": { + "External id": 473643,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084441.875, "dur": 0.798, + "args": { + "External id": 473644,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "83495936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084448.468, "dur": 11.121, + "args": { + "External id": 473645,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084464.495, "dur": 0.849, + "args": { + "External id": 473646,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "90835968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084470.527, "dur": 11.937, + "args": { + "External id": 473647,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084487.137, "dur": 0.724, + "args": { + "External id": 473648,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084492.111, "dur": 11.737, + "args": { + "External id": 473649,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084508.473, "dur": 3.141, + "args": { + "External id": 473650,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "98176512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084516.437, "dur": 11.764, + "args": { + "External id": 473651,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084534.904, "dur": 0.838, + "args": { + "External id": 473652,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100273664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084540.325, "dur": 11.101, + "args": { + "External id": 473653,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084556.262, "dur": 0.718, + "args": { + "External id": 473654,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100797952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084561.546, "dur": 14.034, + "args": { + "External id": 473655,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084580.499, "dur": 0.717, + "args": { + "External id": 473656,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "101322240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084586.951, "dur": 27.694, + "args": { + "External id": 473657,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084623.759, "dur": 1.671, + "args": { + "External id": 473658,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084631.273, "dur": 15.411, + "args": { + "External id": 473659,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084651.784, "dur": 1.128, + "args": { + "External id": 473660,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "103419904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084657.860, "dur": 12.437, + "args": { + "External id": 473661,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084675.797, "dur": 0.910, + "args": { + "External id": 473662,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "110759936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084681.841, "dur": 13.330, + "args": { + "External id": 473663,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084699.993, "dur": 1.059, + "args": { + "External id": 473664,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "118099968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084705.934, "dur": 12.990, + "args": { + "External id": 473665,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084724.277, "dur": 2.950, + "args": { + "External id": 473666,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084731.522, "dur": 13.056, + "args": { + "External id": 473667,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2379456, + "ts": 6267456084749.408, "dur": 0.853, + "args": { + "External id": 473668,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "125440512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2379456, + "ts": 6267456084754.661, "dur": 12.839, + "args": { + "External id": 473669,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4000, 4096], [4000, 4096], []], "Ev Idx": 11844 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "ProfilerStep#10751", "pid": 2338707, "tid": 2338707, + "ts": 6267449557394.764, "dur": 6556897.271, + "args": { + "External id": 452609,"Record function id": 0, "Ev Idx": 11845 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.zero_grad#AdamW.zero_grad", "pid": 2338707, "tid": 2338707, + "ts": 6267449557427.824, "dur": 825.464, + "args": { + "External id": 452610,"Record function id": 0, "Ev Idx": 11846 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 2338707, "tid": 2338707, + "ts": 6267449558305.009, "dur": 2187.269, + "args": { + "External id": 452611,"Record function id": 0, "Ev Idx": 11847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449559390.201, "dur": 8.267, + "args": { + "External id": 452612,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338707, "tid": 2338707, + "ts": 6267449559421.470, "dur": 7.279, + "args": { + "External id": 452613,"Sequence number": 5036366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 11849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449559898.229, "dur": 2.549, + "args": { + "External id": 452614,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338707, "tid": 2338707, + "ts": 6267449559910.807, "dur": 2.503, + "args": { + "External id": 452615,"Sequence number": 5036366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 11851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449560382.676, "dur": 1.933, + "args": { + "External id": 452616,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338707, "tid": 2338707, + "ts": 6267449560389.619, "dur": 2.250, + "args": { + "External id": 452617,"Sequence number": 5036366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 11853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449560916.141, "dur": 16.704, + "args": { + "External id": 452618,"Sequence number": 5036366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 11854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449560926.692, "dur": 2.316, + "args": { + "External id": 452619,"Record function id": 0, "Concrete Inputs": ["", "[8, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 11855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449560934.544, "dur": 4.304, + "args": { + "External id": 452620,"Sequence number": 5036366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 11856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449560936.461, "dur": 1.032, + "args": { + "External id": 452621,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 11857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449560967.258, "dur": 2878.876, + "args": { + "External id": 452622,"Sequence number": 5036366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], [], []], "Ev Idx": 11858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449560974.705, "dur": 2870.670, + "args": { + "External id": 452623,"Sequence number": 5036366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 11859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449560986.482, "dur": 10.425, + "args": { + "External id": 452624,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449560999.220, "dur": 2844.916, + "args": { + "External id": 452625,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449561009.218, "dur": 0.613, + "args": { + "External id": 452626,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 11862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338707, "tid": 2338707, + "ts": 6267449561012.503, "dur": 5.768, + "args": { + "External id": 452627,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[8, 4096], [8, 4096]], "Ev Idx": 11863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338707, "tid": 2338707, + "ts": 6267449561014.734, "dur": 3.384, + "args": { + "External id": 452628,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[8, 4096], [], []], "Ev Idx": 11864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449561017.424, "dur": 0.456, + "args": { + "External id": 452629,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 11865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267449561020.109, "dur": 188.496, + "args": { + "External id": 452630,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 11866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267449561022.727, "dur": 185.521, + "args": { + "External id": 452631,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 11867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267449561024.858, "dur": 18.533, + "args": { + "External id": 452632,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 11868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449561029.244, "dur": 13.529, + "args": { + "External id": 452633,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449561044.264, "dur": 163.215, + "args": { + "External id": 452634,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449561211.118, "dur": 2629.930, + "args": { + "External id": 452635,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449563861.690, "dur": 393.313, + "args": { + "External id": 452636,"Sequence number": 5036366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 11872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449563863.571, "dur": 390.829, + "args": { + "External id": 452637,"Sequence number": 5036366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], []], "Ev Idx": 11873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449563870.530, "dur": 9.285, + "args": { + "External id": 452638,"Record function id": 0, "Concrete Inputs": ["[8, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449563883.923, "dur": 367.869, + "args": { + "External id": 452639,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[8, 8192], [8, 8192], []], "Ev Idx": 11875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338707, "tid": 2338707, + "ts": 6267449564281.645, "dur": 54.410, + "args": { + "External id": 452640,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449564286.354, "dur": 5.697, + "args": { + "External id": 452641,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338707, "tid": 2338707, + "ts": 6267449564295.312, "dur": 40.289, + "args": { + "External id": 452642,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 11878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267449564299.787, "dur": 8.882, + "args": { + "External id": 452643,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 11879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 2338707, "tid": 2338707, + "ts": 6267449564347.543, "dur": 72.869, + "args": { + "External id": 452644,"Sequence number": 5036366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338707, "tid": 2338707, + "ts": 6267449564354.942, "dur": 6.864, + "args": { + "External id": 452645,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 11881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449564359.613, "dur": 1.896, + "args": { + "External id": 452646,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 11882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449564362.834, "dur": 3.682, + "args": { + "External id": 452647,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2338707, + "ts": 6267449564368.653, "dur": 3.562, + "args": { + "External id": 452648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[8, 4096]], "Ev Idx": 11884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338707, "tid": 2338707, + "ts": 6267449564374.860, "dur": 5.548, + "args": { + "External id": 452649,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 11885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449564379.707, "dur": 0.584, + "args": { + "External id": 452650,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 11886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338707, "tid": 2338707, + "ts": 6267449564381.130, "dur": 4.530, + "args": { + "External id": 452651,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 11887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449564384.677, "dur": 0.903, + "args": { + "External id": 452652,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 11888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338707, "tid": 2338707, + "ts": 6267449564387.255, "dur": 5.550, + "args": { + "External id": 452653,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [8, 1, 1, 4096]], "Ev Idx": 11889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338707, "tid": 2338707, + "ts": 6267449564390.620, "dur": 2.072, + "args": { + "External id": 452654,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 11890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449564391.862, "dur": 0.753, + "args": { + "External id": 452655,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 11891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449564393.694, "dur": 25.835, + "args": { + "External id": 452656,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[8, 1, 1, 4096], [8, 1, 1, 4096], []], "Ev Idx": 11892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449564428.848, "dur": 29.436, + "args": { + "External id": 452657,"Sequence number": 5036366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 11893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449564430.869, "dur": 27.196, + "args": { + "External id": 452658,"Sequence number": 5036366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 11894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449564436.170, "dur": 3.219, + "args": { + "External id": 452659,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449564440.246, "dur": 17.381, + "args": { + "External id": 452660,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11896 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267449564576.245, "dur": 183.420, + "args": { + "External id": 452661,"Record function id": 0, "Ev Idx": 11897 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 2338707, "tid": 2338707, + "ts": 6267449564690.591, "dur": 57.831, + "args": { + "External id": 452662,"Record function id": 0, "Ev Idx": 11898 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267449564767.202, "dur": 46.573, + "args": { + "External id": 452663,"Record function id": 0, "Ev Idx": 11899 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267449564822.376, "dur": 14008.194, + "args": { + "External id": 452664,"Record function id": 0, "Ev Idx": 11900 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 2338707, "tid": 2338707, + "ts": 6267449564833.136, "dur": 1482.777, + "args": { + "External id": 452665,"Record function id": 0, "Ev Idx": 11901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449564958.701, "dur": 8.190, + "args": { + "External id": 452666,"Record function id": 0, "Concrete Inputs": ["[141824512]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449564986.506, "dur": 180.510, + "args": { + "External id": 452667,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 11903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449564991.590, "dur": 1.729, + "args": { + "External id": 452668,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449564999.613, "dur": 0.456, + "args": { + "External id": 452669,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565001.036, "dur": 0.349, + "args": { + "External id": 452670,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "16384512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565002.192, "dur": 2.994, + "args": { + "External id": 452671,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "18481664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565010.173, "dur": 0.247, + "args": { + "External id": 452672,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19005952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565011.236, "dur": 0.475, + "args": { + "External id": 452673,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "19530240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565012.412, "dur": 2.333, + "args": { + "External id": 452674,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565018.323, "dur": 0.535, + "args": { + "External id": 452675,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "21627904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565019.643, "dur": 0.286, + "args": { + "External id": 452676,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "28967936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565024.732, "dur": 0.331, + "args": { + "External id": 452677,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "36307968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565025.720, "dur": 0.371, + "args": { + "External id": 452678,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565026.886, "dur": 2.441, + "args": { + "External id": 452679,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "43648512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565034.702, "dur": 0.409, + "args": { + "External id": 452680,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45745664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565036.082, "dur": 0.458, + "args": { + "External id": 452681,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46269952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565037.199, "dur": 2.406, + "args": { + "External id": 452682,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "46794240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565042.523, "dur": 0.428, + "args": { + "External id": 452683,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565043.419, "dur": 0.429, + "args": { + "External id": 452684,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "48891904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565049.067, "dur": 0.401, + "args": { + "External id": 452685,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "56231936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565050.189, "dur": 0.389, + "args": { + "External id": 452686,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "63571968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565051.529, "dur": 41.653, + "args": { + "External id": 452687,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565099.555, "dur": 0.386, + "args": { + "External id": 452688,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "70912512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565100.671, "dur": 0.311, + "args": { + "External id": 452689,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73009664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565101.683, "dur": 2.660, + "args": { + "External id": 452690,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73533952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565107.553, "dur": 0.499, + "args": { + "External id": 452691,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "74058240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565108.711, "dur": 0.285, + "args": { + "External id": 452692,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565114.007, "dur": 0.464, + "args": { + "External id": 452693,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "76155904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565115.190, "dur": 0.472, + "args": { + "External id": 452694,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "83495936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565116.750, "dur": 2.403, + "args": { + "External id": 452695,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "90835968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565122.834, "dur": 0.257, + "args": { + "External id": 452696,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565123.776, "dur": 0.347, + "args": { + "External id": 452697,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "98176512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565124.955, "dur": 2.656, + "args": { + "External id": 452698,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100273664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565130.553, "dur": 0.489, + "args": { + "External id": 452699,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100797952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565131.517, "dur": 0.339, + "args": { + "External id": 452700,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "101322240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565137.682, "dur": 0.243, + "args": { + "External id": 452701,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565138.552, "dur": 0.375, + "args": { + "External id": 452702,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "103419904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565139.676, "dur": 2.322, + "args": { + "External id": 452703,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "110759936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565145.297, "dur": 0.316, + "args": { + "External id": 452704,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "118099968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565146.263, "dur": 0.541, + "args": { + "External id": 452705,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565147.536, "dur": 2.474, + "args": { + "External id": 452706,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "125440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449565193.363, "dur": 155.338, + "args": { + "External id": 452707,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267449565433.303, "dur": 353.475, + "args": { + "External id": 452708,"Record function id": 0, "Concrete Inputs": ["", "", "141824512", "8", "1", "15", ""], "Input type": ["TensorList", "", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 11944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449565452.570, "dur": 5.357, + "args": { + "External id": 452709,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267449565465.270, "dur": 13.842, + "args": { + "External id": 452710,"Record function id": 0, "Concrete Inputs": ["", "0", "141824512", "141824512"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 11946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449565469.855, "dur": 8.810, + "args": { + "External id": 452711,"Record function id": 0, "Concrete Inputs": ["", "0", "141824512", "283649024", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[1134596096], [], [], [], []], "Ev Idx": 11947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565475.201, "dur": 0.831, + "args": { + "External id": 452712,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "[1]", "141824512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 11948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449565488.180, "dur": 137.650, + "args": { + "External id": 452713,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 11949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565489.985, "dur": 0.509, + "args": { + "External id": 452714,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "141824512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565491.757, "dur": 0.532, + "args": { + "External id": 452715,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "158208512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565495.084, "dur": 2.803, + "args": { + "External id": 452716,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "158209024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565498.691, "dur": 0.885, + "args": { + "External id": 452717,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "160306176"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565502.277, "dur": 0.373, + "args": { + "External id": 452718,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "160830464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565503.387, "dur": 0.322, + "args": { + "External id": 452719,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "161354752"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565504.562, "dur": 0.375, + "args": { + "External id": 452720,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "163451904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565507.627, "dur": 0.444, + "args": { + "External id": 452721,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "163452416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565508.615, "dur": 0.822, + "args": { + "External id": 452722,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "170792448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565510.047, "dur": 0.569, + "args": { + "External id": 452723,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "178132480"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565513.483, "dur": 2.310, + "args": { + "External id": 452724,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "185472512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565516.710, "dur": 0.321, + "args": { + "External id": 452725,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "185473024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565519.725, "dur": 2.422, + "args": { + "External id": 452726,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "187570176"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565523.086, "dur": 0.384, + "args": { + "External id": 452727,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "188094464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565524.169, "dur": 0.256, + "args": { + "External id": 452728,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "188618752"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565529.225, "dur": 0.447, + "args": { + "External id": 452729,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "190715904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565530.377, "dur": 0.481, + "args": { + "External id": 452730,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "190716416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565531.483, "dur": 0.393, + "args": { + "External id": 452731,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "198056448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565536.760, "dur": 2.529, + "args": { + "External id": 452732,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "205396480"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565539.889, "dur": 0.285, + "args": { + "External id": 452733,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "212736512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565542.702, "dur": 2.588, + "args": { + "External id": 452734,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "212737024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565545.974, "dur": 0.449, + "args": { + "External id": 452735,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "214834176"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565547.254, "dur": 0.144, + "args": { + "External id": 452736,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "215358464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565553.084, "dur": 0.361, + "args": { + "External id": 452737,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "215882752"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565554.057, "dur": 0.467, + "args": { + "External id": 452738,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "217979904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565555.225, "dur": 0.324, + "args": { + "External id": 452739,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "217980416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565560.587, "dur": 2.339, + "args": { + "External id": 452740,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "225320448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565563.549, "dur": 0.335, + "args": { + "External id": 452741,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "232660480"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565566.874, "dur": 1.879, + "args": { + "External id": 452742,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "240000512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565569.816, "dur": 0.288, + "args": { + "External id": 452743,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "240001024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565570.651, "dur": 0.290, + "args": { + "External id": 452744,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "242098176"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565575.995, "dur": 0.475, + "args": { + "External id": 452745,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "242622464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565577.013, "dur": 0.252, + "args": { + "External id": 452746,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "243146752"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565577.710, "dur": 0.370, + "args": { + "External id": 452747,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "245243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565582.652, "dur": 2.258, + "args": { + "External id": 452748,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "245244416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565585.476, "dur": 0.255, + "args": { + "External id": 452749,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "252584448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565588.032, "dur": 2.164, + "args": { + "External id": 452750,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "259924480"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565590.970, "dur": 0.374, + "args": { + "External id": 452751,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "267264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449565608.100, "dur": 0.456, + "args": { + "External id": 452752,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "267265024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449565649.127, "dur": 120.855, + "args": { + "External id": 452753,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267449565851.704, "dur": 345.633, + "args": { + "External id": 452754,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[1134596096], [141824512], [], [], []], "Ev Idx": 11990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449565888.156, "dur": 303.032, + "args": { + "External id": 452755,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1134596096, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[141824512], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11991, "In msg nelems": 141824512 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267449565899.084, "dur": 285.452, + "args": { + "External id": 452756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[141824512]], "Ev Idx": 11992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449566227.686, "dur": 2.295, + "args": { + "External id": 452757,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11993, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 2338707, "tid": 2338707, + "ts": 6267449566332.808, "dur": 12255.213, + "args": { + "External id": 452758,"Record function id": 0, "Ev Idx": 11994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566558.294, "dur": 7.192, + "args": { + "External id": 452759,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 11995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566569.218, "dur": 0.889, + "args": { + "External id": 452760,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 11996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566571.797, "dur": 1.266, + "args": { + "External id": 452761,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566574.750, "dur": 3.510, + "args": { + "External id": 452762,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566579.780, "dur": 0.913, + "args": { + "External id": 452763,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566582.259, "dur": 1.195, + "args": { + "External id": 452764,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566587.353, "dur": 0.795, + "args": { + "External id": 452765,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566589.395, "dur": 1.970, + "args": { + "External id": 452766,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566612.480, "dur": 1.348, + "args": { + "External id": 452767,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566615.627, "dur": 0.817, + "args": { + "External id": 452768,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566620.023, "dur": 1.089, + "args": { + "External id": 452769,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566622.327, "dur": 2.614, + "args": { + "External id": 452770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566626.313, "dur": 0.870, + "args": { + "External id": 452771,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566628.416, "dur": 1.231, + "args": { + "External id": 452772,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566633.422, "dur": 0.685, + "args": { + "External id": 452773,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566635.322, "dur": 2.285, + "args": { + "External id": 452774,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566641.226, "dur": 0.811, + "args": { + "External id": 452775,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566643.155, "dur": 1.033, + "args": { + "External id": 452776,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566647.650, "dur": 1.000, + "args": { + "External id": 452777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566649.810, "dur": 2.799, + "args": { + "External id": 452778,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566653.808, "dur": 0.858, + "args": { + "External id": 452779,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566656.029, "dur": 1.185, + "args": { + "External id": 452780,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566660.627, "dur": 0.713, + "args": { + "External id": 452781,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566662.367, "dur": 2.009, + "args": { + "External id": 452782,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566684.260, "dur": 0.894, + "args": { + "External id": 452783,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566686.784, "dur": 0.803, + "args": { + "External id": 452784,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566691.328, "dur": 1.041, + "args": { + "External id": 452785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566693.556, "dur": 3.182, + "args": { + "External id": 452786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566697.903, "dur": 1.060, + "args": { + "External id": 452787,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566700.079, "dur": 0.693, + "args": { + "External id": 452788,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566704.403, "dur": 0.941, + "args": { + "External id": 452789,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566706.443, "dur": 1.923, + "args": { + "External id": 452790,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566711.647, "dur": 0.896, + "args": { + "External id": 452791,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566713.644, "dur": 0.765, + "args": { + "External id": 452792,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566717.637, "dur": 1.042, + "args": { + "External id": 452793,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566719.814, "dur": 2.387, + "args": { + "External id": 452794,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566723.650, "dur": 0.806, + "args": { + "External id": 452795,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566725.542, "dur": 1.134, + "args": { + "External id": 452796,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566730.351, "dur": 0.711, + "args": { + "External id": 452797,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449566732.172, "dur": 1.719, + "args": { + "External id": 452798,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 12034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449566764.173, "dur": 11765.370, + "args": { + "External id": 452799,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 12035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449566788.996, "dur": 11730.157, + "args": { + "External id": 452800,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 12036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449566809.158, "dur": 5.891, + "args": { + "External id": 452801,"Record function id": 0, "Concrete Inputs": ["[4384]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449566820.419, "dur": 11653.009, + "args": { + "External id": 452802,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], [], []], "Ev Idx": 12038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449566825.601, "dur": 11647.202, + "args": { + "External id": 452803,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], []], "Ev Idx": 12039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449566833.848, "dur": 6.369, + "args": { + "External id": 452804,"Record function id": 0, "Concrete Inputs": ["[4384]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449566841.955, "dur": 11627.531, + "args": { + "External id": 452805,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4384], [4384], []], "Ev Idx": 12041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267449578914.130, "dur": 36.121, + "args": { + "External id": 452806,"Record function id": 0, "Ev Idx": 12042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 2338707, "tid": 2338707, + "ts": 6267449578951.584, "dur": 312.990, + "args": { + "External id": 452807,"Record function id": 0, "Ev Idx": 12043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449578998.565, "dur": 255.554, + "args": { + "External id": 452808,"Sequence number": 5036366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32000, 4096], [8, 4096]], "Ev Idx": 12044 + } + }, + { + "ph": "s", "id": 224, "pid": 2338707, "tid": 2338707, "ts": 6267449578998.565, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449579151.608, "dur": 60.562, + "args": { + "External id": 452809,"kernel_hash": "coj4cw2xx67czwyji56kkta4pwcipuxvxsnrrkf7vsjjhsb6mejv", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oj/coj4cw2xx67czwyji56kkta4pwcipuxvxsnrrkf7vsjjhsb6mejv.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096], [32000, 4096], [8, 4096, 4096], []], "Ev Idx": 12045 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267449579333.242, "dur": 61.570, + "args": { + "External id": 452810,"Record function id": 0, "Ev Idx": 12046 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 2338707, "tid": 2338707, + "ts": 6267449579406.621, "dur": 9445.076, + "args": { + "External id": 452811,"Record function id": 0, "Ev Idx": 12047 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338707, "tid": 2338707, + "ts": 6267449579416.294, "dur": 977.024, + "args": { + "External id": 452812,"Record function id": 0, "Ev Idx": 12048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449579501.311, "dur": 13.730, + "args": { + "External id": 452813,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449579529.651, "dur": 45.981, + "args": { + "External id": 452814,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579539.937, "dur": 2.687, + "args": { + "External id": 452815,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579544.325, "dur": 0.439, + "args": { + "External id": 452816,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579545.501, "dur": 3.046, + "args": { + "External id": 452817,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579551.500, "dur": 0.418, + "args": { + "External id": 452818,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579552.556, "dur": 0.343, + "args": { + "External id": 452819,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579558.339, "dur": 0.593, + "args": { + "External id": 452820,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579559.621, "dur": 0.237, + "args": { + "External id": 452821,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579560.748, "dur": 3.147, + "args": { + "External id": 452822,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579568.726, "dur": 0.395, + "args": { + "External id": 452823,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449579587.039, "dur": 96.118, + "args": { + "External id": 452824,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267449579719.909, "dur": 133.204, + "args": { + "External id": 452825,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449579732.118, "dur": 7.059, + "args": { + "External id": 452826,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267449579745.033, "dur": 13.351, + "args": { + "External id": 452827,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449579749.855, "dur": 8.143, + "args": { + "External id": 452828,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579755.518, "dur": 0.719, + "args": { + "External id": 452829,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449579765.272, "dur": 31.227, + "args": { + "External id": 452830,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579766.582, "dur": 0.387, + "args": { + "External id": 452831,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579770.211, "dur": 0.558, + "args": { + "External id": 452832,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579774.076, "dur": 0.742, + "args": { + "External id": 452833,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579777.226, "dur": 0.561, + "args": { + "External id": 452834,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579778.443, "dur": 4.696, + "args": { + "External id": 452835,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579783.777, "dur": 0.764, + "args": { + "External id": 452836,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579787.167, "dur": 0.259, + "args": { + "External id": 452837,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579790.344, "dur": 0.367, + "args": { + "External id": 452838,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449579791.345, "dur": 0.405, + "args": { + "External id": 452839,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449579810.482, "dur": 33.189, + "args": { + "External id": 452840,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267449579910.211, "dur": 377.529, + "args": { + "External id": 452841,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449579945.314, "dur": 336.666, + "args": { + "External id": 452842,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12078, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267449579955.704, "dur": 319.369, + "args": { + "External id": 452843,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449580315.975, "dur": 2.416, + "args": { + "External id": 452844,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12080, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338707, "tid": 2338707, + "ts": 6267449580414.618, "dur": 8214.989, + "args": { + "External id": 452845,"Record function id": 0, "Ev Idx": 12081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449580525.267, "dur": 6.913, + "args": { + "External id": 452846,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449580535.775, "dur": 1.465, + "args": { + "External id": 452847,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449580538.707, "dur": 1.222, + "args": { + "External id": 452848,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449580541.457, "dur": 3.733, + "args": { + "External id": 452849,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449580548.606, "dur": 1.221, + "args": { + "External id": 452850,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449580551.128, "dur": 1.169, + "args": { + "External id": 452851,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449580554.001, "dur": 1.298, + "args": { + "External id": 452852,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449580559.283, "dur": 2.010, + "args": { + "External id": 452853,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449580565.054, "dur": 0.948, + "args": { + "External id": 452854,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449580567.212, "dur": 1.010, + "args": { + "External id": 452855,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449580585.756, "dur": 7977.589, + "args": { + "External id": 452856,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449580626.855, "dur": 7928.380, + "args": { + "External id": 452857,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449580650.542, "dur": 16.698, + "args": { + "External id": 452858,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449580672.980, "dur": 7846.569, + "args": { + "External id": 452859,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449580678.378, "dur": 7840.694, + "args": { + "External id": 452860,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449580684.509, "dur": 7.849, + "args": { + "External id": 452861,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449580694.198, "dur": 7822.325, + "args": { + "External id": 452862,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449588790.927, "dur": 35.306, + "args": { + "External id": 452863,"Sequence number": 5036367, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12099 + } + }, + { + "ph": "s", "id": 223, "pid": 2338707, "tid": 2338707, "ts": 6267449588790.927, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267449588809.588, "dur": 11.793, + "args": { + "External id": 452864,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449588815.894, "dur": 5.236, + "args": { + "External id": 452865,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267449588895.306, "dur": 97.378, + "args": { + "External id": 452866,"Record function id": 0, "Ev Idx": 12102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267449588993.695, "dur": 1270.053, + "args": { + "External id": 452867,"Record function id": 0, "Ev Idx": 12103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449589034.851, "dur": 1212.898, + "args": { + "External id": 452868,"Sequence number": 5036368, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12104 + } + }, + { + "ph": "s", "id": 222, "pid": 2338707, "tid": 2338707, "ts": 6267449589034.851, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449589148.505, "dur": 54.207, + "args": { + "External id": 452869,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449589218.114, "dur": 115.680, + "args": { + "External id": 452870,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449589347.724, "dur": 38.590, + "args": { + "External id": 452871,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449589395.740, "dur": 30.131, + "args": { + "External id": 452872,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449589453.171, "dur": 29.195, + "args": { + "External id": 452873,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267449589506.966, "dur": 22.495, + "args": { + "External id": 452874,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267449589555.864, "dur": 168.936, + "args": { + "External id": 452875,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267449589636.255, "dur": 15.104, + "args": { + "External id": 452876,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449589641.789, "dur": 8.429, + "args": { + "External id": 452877,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449589653.949, "dur": 3.795, + "args": { + "External id": 452878,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449589661.223, "dur": 1.081, + "args": { + "External id": 452879,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449589665.126, "dur": 2.809, + "args": { + "External id": 452880,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449589736.529, "dur": 54.644, + "args": { + "External id": 452881,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267449589827.328, "dur": 32.784, + "args": { + "External id": 452882,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449589867.634, "dur": 45.473, + "args": { + "External id": 452883,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449589923.795, "dur": 35.950, + "args": { + "External id": 452884,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449589982.803, "dur": 29.309, + "args": { + "External id": 452885,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449590021.080, "dur": 91.128, + "args": { + "External id": 452886,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449590144.749, "dur": 22.310, + "args": { + "External id": 452887,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12123 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 2338707, "tid": 2338707, + "ts": 6267449590334.903, "dur": 94.752, + "args": { + "External id": 452888,"Record function id": 0, "Ev Idx": 12124 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267449590505.385, "dur": 47.852, + "args": { + "External id": 452889,"Record function id": 0, "Ev Idx": 12125 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 2338707, "tid": 2338707, + "ts": 6267449590562.615, "dur": 29574.203, + "args": { + "External id": 452890,"Record function id": 0, "Ev Idx": 12126 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338707, "tid": 2338707, + "ts": 6267449590573.656, "dur": 1008.125, + "args": { + "External id": 452891,"Record function id": 0, "Ev Idx": 12127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449590696.602, "dur": 10.678, + "args": { + "External id": 452892,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449590721.388, "dur": 47.075, + "args": { + "External id": 452893,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590729.581, "dur": 4.308, + "args": { + "External id": 452894,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590735.704, "dur": 0.333, + "args": { + "External id": 452895,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590739.169, "dur": 0.497, + "args": { + "External id": 452896,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590742.925, "dur": 0.453, + "args": { + "External id": 452897,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590744.254, "dur": 2.866, + "args": { + "External id": 452898,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590750.465, "dur": 0.594, + "args": { + "External id": 452899,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590754.804, "dur": 0.441, + "args": { + "External id": 452900,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590755.874, "dur": 0.345, + "args": { + "External id": 452901,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590759.364, "dur": 2.328, + "args": { + "External id": 452902,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449590780.562, "dur": 56.405, + "args": { + "External id": 452903,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267449590873.000, "dur": 133.419, + "args": { + "External id": 452904,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449590889.390, "dur": 4.142, + "args": { + "External id": 452905,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267449590899.317, "dur": 10.499, + "args": { + "External id": 452906,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449590904.437, "dur": 4.984, + "args": { + "External id": 452907,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590907.745, "dur": 0.529, + "args": { + "External id": 452908,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449590916.400, "dur": 33.339, + "args": { + "External id": 452909,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590920.229, "dur": 0.654, + "args": { + "External id": 452910,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590922.047, "dur": 2.127, + "args": { + "External id": 452911,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590924.668, "dur": 2.646, + "args": { + "External id": 452912,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590930.310, "dur": 0.611, + "args": { + "External id": 452913,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590931.728, "dur": 0.417, + "args": { + "External id": 452914,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590935.145, "dur": 0.345, + "args": { + "External id": 452915,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590938.563, "dur": 0.433, + "args": { + "External id": 452916,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590939.739, "dur": 0.304, + "args": { + "External id": 452917,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449590945.171, "dur": 0.366, + "args": { + "External id": 452918,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449590962.757, "dur": 32.629, + "args": { + "External id": 452919,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267449591108.981, "dur": 370.803, + "args": { + "External id": 452920,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449591143.285, "dur": 331.135, + "args": { + "External id": 452921,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12157, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267449591154.580, "dur": 314.559, + "args": { + "External id": 452922,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449591504.959, "dur": 2.280, + "args": { + "External id": 452923,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12159, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338707, "tid": 2338707, + "ts": 6267449591634.662, "dur": 28264.719, + "args": { + "External id": 452924,"Record function id": 0, "Ev Idx": 12160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449591747.755, "dur": 7.332, + "args": { + "External id": 452925,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449591758.997, "dur": 1.058, + "args": { + "External id": 452926,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449591761.638, "dur": 1.113, + "args": { + "External id": 452927,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449591764.431, "dur": 1.163, + "args": { + "External id": 452928,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449591766.960, "dur": 1.183, + "args": { + "External id": 452929,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449591769.558, "dur": 0.951, + "args": { + "External id": 452930,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449591775.046, "dur": 1.200, + "args": { + "External id": 452931,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449591777.422, "dur": 5.487, + "args": { + "External id": 452932,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449591784.117, "dur": 0.749, + "args": { + "External id": 452933,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449591786.255, "dur": 0.624, + "args": { + "External id": 452934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449591807.592, "dur": 28047.323, + "args": { + "External id": 452935,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449591827.539, "dur": 28018.512, + "args": { + "External id": 452936,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449591850.393, "dur": 16.086, + "args": { + "External id": 452937,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449591874.730, "dur": 27934.163, + "args": { + "External id": 452938,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449591877.272, "dur": 27931.020, + "args": { + "External id": 452939,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449591883.477, "dur": 6.842, + "args": { + "External id": 452940,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449591892.354, "dur": 27912.733, + "args": { + "External id": 452941,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449620049.112, "dur": 61.568, + "args": { + "External id": 452942,"Sequence number": 5036369, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12178 + } + }, + { + "ph": "s", "id": 221, "pid": 2338707, "tid": 2338707, "ts": 6267449620049.112, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267449620093.754, "dur": 11.560, + "args": { + "External id": 452943,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449620099.733, "dur": 5.246, + "args": { + "External id": 452944,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267449620180.153, "dur": 91.023, + "args": { + "External id": 452945,"Record function id": 0, "Ev Idx": 12181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267449620272.748, "dur": 1170.136, + "args": { + "External id": 452946,"Record function id": 0, "Ev Idx": 12182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449620311.907, "dur": 1117.050, + "args": { + "External id": 452947,"Sequence number": 5036370, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12183 + } + }, + { + "ph": "s", "id": 220, "pid": 2338707, "tid": 2338707, "ts": 6267449620311.907, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449620393.253, "dur": 55.233, + "args": { + "External id": 452948,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449620463.631, "dur": 109.901, + "args": { + "External id": 452949,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449620583.615, "dur": 60.040, + "args": { + "External id": 452950,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449620653.465, "dur": 31.294, + "args": { + "External id": 452951,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449620718.641, "dur": 28.764, + "args": { + "External id": 452952,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267449620765.396, "dur": 23.277, + "args": { + "External id": 452953,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267449620807.173, "dur": 145.783, + "args": { + "External id": 452954,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267449620863.841, "dur": 17.648, + "args": { + "External id": 452955,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449620869.206, "dur": 11.409, + "args": { + "External id": 452956,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449620887.048, "dur": 4.323, + "args": { + "External id": 452957,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449620892.469, "dur": 3.017, + "args": { + "External id": 452958,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449620897.749, "dur": 3.059, + "args": { + "External id": 452959,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449620964.234, "dur": 50.618, + "args": { + "External id": 452960,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267449621047.910, "dur": 71.727, + "args": { + "External id": 452961,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449621133.730, "dur": 47.281, + "args": { + "External id": 452962,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449621189.274, "dur": 34.786, + "args": { + "External id": 452963,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449621248.513, "dur": 26.470, + "args": { + "External id": 452964,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449621281.037, "dur": 36.068, + "args": { + "External id": 452965,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449621337.826, "dur": 20.667, + "args": { + "External id": 452966,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12202 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 2338707, "tid": 2338707, + "ts": 6267449621510.078, "dur": 106.763, + "args": { + "External id": 452967,"Record function id": 0, "Ev Idx": 12203 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267449621701.831, "dur": 51.095, + "args": { + "External id": 452968,"Record function id": 0, "Ev Idx": 12204 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 2338707, "tid": 2338707, + "ts": 6267449621762.052, "dur": 30439.667, + "args": { + "External id": 452969,"Record function id": 0, "Ev Idx": 12205 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338707, "tid": 2338707, + "ts": 6267449621773.722, "dur": 1059.494, + "args": { + "External id": 452970,"Record function id": 0, "Ev Idx": 12206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449621862.066, "dur": 10.526, + "args": { + "External id": 452971,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449621889.318, "dur": 42.615, + "args": { + "External id": 452972,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449621895.044, "dur": 2.234, + "args": { + "External id": 452973,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449621901.127, "dur": 0.466, + "args": { + "External id": 452974,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449621904.887, "dur": 0.600, + "args": { + "External id": 452975,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449621906.248, "dur": 0.551, + "args": { + "External id": 452976,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449621910.664, "dur": 0.494, + "args": { + "External id": 452977,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449621914.557, "dur": 0.417, + "args": { + "External id": 452978,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449621915.872, "dur": 4.491, + "args": { + "External id": 452979,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449621923.173, "dur": 0.459, + "args": { + "External id": 452980,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449621924.646, "dur": 0.286, + "args": { + "External id": 452981,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449621944.043, "dur": 56.741, + "args": { + "External id": 452982,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267449622036.795, "dur": 178.229, + "args": { + "External id": 452983,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449622049.568, "dur": 42.481, + "args": { + "External id": 452984,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267449622099.955, "dur": 11.852, + "args": { + "External id": 452985,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449622105.328, "dur": 6.041, + "args": { + "External id": 452986,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449622109.179, "dur": 0.529, + "args": { + "External id": 452987,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449622119.676, "dur": 37.410, + "args": { + "External id": 452988,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449622124.349, "dur": 2.611, + "args": { + "External id": 452989,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449622128.051, "dur": 0.531, + "args": { + "External id": 452990,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449622129.419, "dur": 0.542, + "args": { + "External id": 452991,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449622135.120, "dur": 2.229, + "args": { + "External id": 452992,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449622138.109, "dur": 0.436, + "args": { + "External id": 452993,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449622141.355, "dur": 0.405, + "args": { + "External id": 452994,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449622145.056, "dur": 0.359, + "args": { + "External id": 452995,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449622146.081, "dur": 0.491, + "args": { + "External id": 452996,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449622149.749, "dur": 2.235, + "args": { + "External id": 452997,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449622169.206, "dur": 36.603, + "args": { + "External id": 452998,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267449622272.681, "dur": 449.010, + "args": { + "External id": 452999,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449622308.095, "dur": 407.685, + "args": { + "External id": 453000,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12236, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267449622320.014, "dur": 389.052, + "args": { + "External id": 453001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449622751.210, "dur": 2.957, + "args": { + "External id": 453002,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12238, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338707, "tid": 2338707, + "ts": 6267449622856.081, "dur": 29102.916, + "args": { + "External id": 453003,"Record function id": 0, "Ev Idx": 12239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449622972.300, "dur": 7.148, + "args": { + "External id": 453004,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449622983.400, "dur": 1.075, + "args": { + "External id": 453005,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449622986.421, "dur": 3.547, + "args": { + "External id": 453006,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449622991.837, "dur": 0.665, + "args": { + "External id": 453007,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449622994.073, "dur": 1.010, + "args": { + "External id": 453008,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449622996.347, "dur": 1.110, + "args": { + "External id": 453009,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449623001.432, "dur": 1.055, + "args": { + "External id": 453010,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449623004.311, "dur": 3.093, + "args": { + "External id": 453011,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449623008.622, "dur": 0.974, + "args": { + "External id": 453012,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449623011.082, "dur": 0.864, + "args": { + "External id": 453013,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449623033.437, "dur": 28882.641, + "args": { + "External id": 453014,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449623049.442, "dur": 28858.110, + "args": { + "External id": 453015,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449623113.133, "dur": 17.004, + "args": { + "External id": 453016,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449623134.897, "dur": 28737.644, + "args": { + "External id": 453017,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449623137.847, "dur": 28734.157, + "args": { + "External id": 453018,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449623144.033, "dur": 5.838, + "args": { + "External id": 453019,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449623151.722, "dur": 28717.655, + "args": { + "External id": 453020,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449652141.906, "dur": 33.686, + "args": { + "External id": 453021,"Sequence number": 5036371, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12257 + } + }, + { + "ph": "s", "id": 219, "pid": 2338707, "tid": 2338707, "ts": 6267449652141.906, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267449652161.546, "dur": 9.194, + "args": { + "External id": 453022,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449652165.135, "dur": 5.256, + "args": { + "External id": 453023,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267449652245.803, "dur": 82.382, + "args": { + "External id": 453024,"Record function id": 0, "Ev Idx": 12260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267449652329.605, "dur": 1159.524, + "args": { + "External id": 453025,"Record function id": 0, "Ev Idx": 12261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449652375.128, "dur": 1098.800, + "args": { + "External id": 453026,"Sequence number": 5036372, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12262 + } + }, + { + "ph": "s", "id": 218, "pid": 2338707, "tid": 2338707, "ts": 6267449652375.128, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449652446.439, "dur": 52.391, + "args": { + "External id": 453027,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449652512.604, "dur": 116.040, + "args": { + "External id": 453028,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449652644.000, "dur": 40.460, + "args": { + "External id": 453029,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449652695.044, "dur": 29.685, + "args": { + "External id": 453030,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449652751.577, "dur": 27.710, + "args": { + "External id": 453031,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267449652799.374, "dur": 19.435, + "args": { + "External id": 453032,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267449652842.587, "dur": 137.717, + "args": { + "External id": 453033,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267449652895.888, "dur": 14.688, + "args": { + "External id": 453034,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449652904.618, "dur": 5.126, + "args": { + "External id": 453035,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449652913.144, "dur": 5.038, + "args": { + "External id": 453036,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449652919.309, "dur": 0.845, + "args": { + "External id": 453037,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449652922.767, "dur": 5.601, + "args": { + "External id": 453038,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449652992.014, "dur": 47.975, + "args": { + "External id": 453039,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267449653114.701, "dur": 35.833, + "args": { + "External id": 453040,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449653162.119, "dur": 47.611, + "args": { + "External id": 453041,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449653219.210, "dur": 35.614, + "args": { + "External id": 453042,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449653278.678, "dur": 30.659, + "args": { + "External id": 453043,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449653317.744, "dur": 36.169, + "args": { + "External id": 453044,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449653376.928, "dur": 19.379, + "args": { + "External id": 453045,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12281 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 2338707, "tid": 2338707, + "ts": 6267449653558.707, "dur": 103.084, + "args": { + "External id": 453046,"Record function id": 0, "Ev Idx": 12282 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267449653742.047, "dur": 50.429, + "args": { + "External id": 453047,"Record function id": 0, "Ev Idx": 12283 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 2338707, "tid": 2338707, + "ts": 6267449653802.187, "dur": 29504.676, + "args": { + "External id": 453048,"Record function id": 0, "Ev Idx": 12284 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338707, "tid": 2338707, + "ts": 6267449653814.923, "dur": 1048.587, + "args": { + "External id": 453049,"Record function id": 0, "Ev Idx": 12285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449653902.985, "dur": 10.025, + "args": { + "External id": 453050,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449653927.301, "dur": 49.199, + "args": { + "External id": 453051,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449653933.198, "dur": 2.355, + "args": { + "External id": 453052,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449653941.802, "dur": 0.425, + "args": { + "External id": 453053,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449653943.151, "dur": 0.315, + "args": { + "External id": 453054,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449653950.239, "dur": 0.489, + "args": { + "External id": 453055,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449653956.430, "dur": 0.621, + "args": { + "External id": 453056,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449653957.760, "dur": 0.638, + "args": { + "External id": 453057,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449653961.840, "dur": 4.183, + "args": { + "External id": 453058,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449653966.831, "dur": 0.524, + "args": { + "External id": 453059,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449653968.080, "dur": 0.621, + "args": { + "External id": 453060,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449653989.703, "dur": 60.206, + "args": { + "External id": 453061,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267449654124.997, "dur": 158.120, + "args": { + "External id": 453062,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449654139.815, "dur": 5.888, + "args": { + "External id": 453063,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267449654151.436, "dur": 12.377, + "args": { + "External id": 453064,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449654156.852, "dur": 6.535, + "args": { + "External id": 453065,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449654160.523, "dur": 0.957, + "args": { + "External id": 453066,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449654173.424, "dur": 35.579, + "args": { + "External id": 453067,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449654175.389, "dur": 2.838, + "args": { + "External id": 453068,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449654179.280, "dur": 0.321, + "args": { + "External id": 453069,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449654183.026, "dur": 0.286, + "args": { + "External id": 453070,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449654187.057, "dur": 2.919, + "args": { + "External id": 453071,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449654192.650, "dur": 0.246, + "args": { + "External id": 453072,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449654193.594, "dur": 0.262, + "args": { + "External id": 453073,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449654196.981, "dur": 0.326, + "args": { + "External id": 453074,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449654200.313, "dur": 0.463, + "args": { + "External id": 453075,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449654201.438, "dur": 2.509, + "args": { + "External id": 453076,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449654233.123, "dur": 41.042, + "args": { + "External id": 453077,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267449654342.602, "dur": 411.193, + "args": { + "External id": 453078,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449654381.430, "dur": 363.276, + "args": { + "External id": 453079,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12315, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267449654395.033, "dur": 343.266, + "args": { + "External id": 453080,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449654782.071, "dur": 2.610, + "args": { + "External id": 453081,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12317, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338707, "tid": 2338707, + "ts": 6267449654885.597, "dur": 28210.374, + "args": { + "External id": 453082,"Record function id": 0, "Ev Idx": 12318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449654996.384, "dur": 6.951, + "args": { + "External id": 453083,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449655006.575, "dur": 1.222, + "args": { + "External id": 453084,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449655009.834, "dur": 3.543, + "args": { + "External id": 453085,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449655015.380, "dur": 0.933, + "args": { + "External id": 453086,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449655017.792, "dur": 0.901, + "args": { + "External id": 453087,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449655019.983, "dur": 1.106, + "args": { + "External id": 453088,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449655024.775, "dur": 1.076, + "args": { + "External id": 453089,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449655027.173, "dur": 3.177, + "args": { + "External id": 453090,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449655031.709, "dur": 1.003, + "args": { + "External id": 453091,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449655034.012, "dur": 1.193, + "args": { + "External id": 453092,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449655093.549, "dur": 27924.639, + "args": { + "External id": 453093,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449655110.305, "dur": 27899.724, + "args": { + "External id": 453094,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449655131.159, "dur": 18.554, + "args": { + "External id": 453095,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449655153.683, "dur": 27818.977, + "args": { + "External id": 453096,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449655156.787, "dur": 27815.339, + "args": { + "External id": 453097,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449655165.582, "dur": 6.576, + "args": { + "External id": 453098,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449655173.840, "dur": 27795.453, + "args": { + "External id": 453099,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449683244.564, "dur": 37.040, + "args": { + "External id": 453100,"Sequence number": 5036373, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12336 + } + }, + { + "ph": "s", "id": 217, "pid": 2338707, "tid": 2338707, "ts": 6267449683244.564, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267449683267.452, "dur": 8.999, + "args": { + "External id": 453101,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449683271.226, "dur": 4.956, + "args": { + "External id": 453102,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267449683349.794, "dur": 79.226, + "args": { + "External id": 453103,"Record function id": 0, "Ev Idx": 12339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267449683433.082, "dur": 1227.988, + "args": { + "External id": 453104,"Record function id": 0, "Ev Idx": 12340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449683472.961, "dur": 1172.197, + "args": { + "External id": 453105,"Sequence number": 5036374, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12341 + } + }, + { + "ph": "s", "id": 216, "pid": 2338707, "tid": 2338707, "ts": 6267449683472.961, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449683540.186, "dur": 69.697, + "args": { + "External id": 453106,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449683626.140, "dur": 107.406, + "args": { + "External id": 453107,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449683749.689, "dur": 42.466, + "args": { + "External id": 453108,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449683798.674, "dur": 29.105, + "args": { + "External id": 453109,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449683863.616, "dur": 31.266, + "args": { + "External id": 453110,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267449683913.954, "dur": 18.086, + "args": { + "External id": 453111,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267449683955.273, "dur": 179.103, + "args": { + "External id": 453112,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267449684010.213, "dur": 11.504, + "args": { + "External id": 453113,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449684015.371, "dur": 5.545, + "args": { + "External id": 453114,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449684024.011, "dur": 4.205, + "args": { + "External id": 453115,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449684029.616, "dur": 1.395, + "args": { + "External id": 453116,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449684035.620, "dur": 4.482, + "args": { + "External id": 453117,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449684147.966, "dur": 59.121, + "args": { + "External id": 453118,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267449684246.388, "dur": 34.298, + "args": { + "External id": 453119,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449684292.204, "dur": 43.790, + "args": { + "External id": 453120,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449684376.575, "dur": 37.641, + "args": { + "External id": 453121,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449684444.874, "dur": 27.494, + "args": { + "External id": 453122,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449684477.721, "dur": 36.967, + "args": { + "External id": 453123,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449684532.707, "dur": 23.566, + "args": { + "External id": 453124,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12360 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 2338707, "tid": 2338707, + "ts": 6267449684731.353, "dur": 89.647, + "args": { + "External id": 453125,"Record function id": 0, "Ev Idx": 12361 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267449684899.132, "dur": 51.680, + "args": { + "External id": 453126,"Record function id": 0, "Ev Idx": 12362 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 2338707, "tid": 2338707, + "ts": 6267449684960.400, "dur": 30520.721, + "args": { + "External id": 453127,"Record function id": 0, "Ev Idx": 12363 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338707, "tid": 2338707, + "ts": 6267449684972.412, "dur": 983.855, + "args": { + "External id": 453128,"Record function id": 0, "Ev Idx": 12364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449685102.327, "dur": 11.191, + "args": { + "External id": 453129,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449685128.152, "dur": 41.051, + "args": { + "External id": 453130,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685136.051, "dur": 2.323, + "args": { + "External id": 453131,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685142.354, "dur": 0.349, + "args": { + "External id": 453132,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685143.355, "dur": 0.581, + "args": { + "External id": 453133,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685146.966, "dur": 0.388, + "args": { + "External id": 453134,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685149.190, "dur": 0.466, + "args": { + "External id": 453135,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685152.624, "dur": 0.570, + "args": { + "External id": 453136,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685153.840, "dur": 4.361, + "args": { + "External id": 453137,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685158.828, "dur": 0.259, + "args": { + "External id": 453138,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685161.754, "dur": 0.248, + "args": { + "External id": 453139,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449685181.049, "dur": 58.781, + "args": { + "External id": 453140,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267449685274.914, "dur": 133.086, + "args": { + "External id": 453141,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449685287.491, "dur": 3.802, + "args": { + "External id": 453142,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267449685296.635, "dur": 12.624, + "args": { + "External id": 453143,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449685301.500, "dur": 7.345, + "args": { + "External id": 453144,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685307.021, "dur": 0.582, + "args": { + "External id": 453145,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449685316.047, "dur": 32.573, + "args": { + "External id": 453146,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685317.212, "dur": 2.499, + "args": { + "External id": 453147,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685323.085, "dur": 0.357, + "args": { + "External id": 453148,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685323.988, "dur": 0.396, + "args": { + "External id": 453149,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685329.787, "dur": 2.954, + "args": { + "External id": 453150,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685333.370, "dur": 0.462, + "args": { + "External id": 453151,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685334.534, "dur": 0.260, + "args": { + "External id": 453152,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685339.273, "dur": 0.364, + "args": { + "External id": 453153,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685340.219, "dur": 0.460, + "args": { + "External id": 453154,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449685341.132, "dur": 2.384, + "args": { + "External id": 453155,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449685362.394, "dur": 37.318, + "args": { + "External id": 453156,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267449685466.065, "dur": 390.390, + "args": { + "External id": 453157,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449685507.145, "dur": 343.191, + "args": { + "External id": 453158,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12394, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267449685517.077, "dur": 325.106, + "args": { + "External id": 453159,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449685881.536, "dur": 2.268, + "args": { + "External id": 453160,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12396, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338707, "tid": 2338707, + "ts": 6267449685976.656, "dur": 29303.916, + "args": { + "External id": 453161,"Record function id": 0, "Ev Idx": 12397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449686128.979, "dur": 7.237, + "args": { + "External id": 453162,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449686139.979, "dur": 1.148, + "args": { + "External id": 453163,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449686143.435, "dur": 3.440, + "args": { + "External id": 453164,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449686148.415, "dur": 1.004, + "args": { + "External id": 453165,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449686150.850, "dur": 0.910, + "args": { + "External id": 453166,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449686153.145, "dur": 0.832, + "args": { + "External id": 453167,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449686157.806, "dur": 0.592, + "args": { + "External id": 453168,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449686161.799, "dur": 1.950, + "args": { + "External id": 453169,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449686165.018, "dur": 0.880, + "args": { + "External id": 453170,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449686167.309, "dur": 0.607, + "args": { + "External id": 453171,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449686188.136, "dur": 29048.327, + "args": { + "External id": 453172,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449686207.784, "dur": 29020.119, + "args": { + "External id": 453173,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449686228.308, "dur": 17.122, + "args": { + "External id": 453174,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449686249.197, "dur": 28940.657, + "args": { + "External id": 453175,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449686252.015, "dur": 28937.234, + "args": { + "External id": 453176,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449686257.967, "dur": 5.198, + "args": { + "External id": 453177,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449686265.111, "dur": 28920.479, + "args": { + "External id": 453178,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449715424.841, "dur": 31.911, + "args": { + "External id": 453179,"Sequence number": 5036375, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12415 + } + }, + { + "ph": "s", "id": 215, "pid": 2338707, "tid": 2338707, "ts": 6267449715424.841, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267449715440.997, "dur": 11.084, + "args": { + "External id": 453180,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449715447.092, "dur": 4.844, + "args": { + "External id": 453181,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267449715523.726, "dur": 93.662, + "args": { + "External id": 453182,"Record function id": 0, "Ev Idx": 12418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267449715619.905, "dur": 1143.351, + "args": { + "External id": 453183,"Record function id": 0, "Ev Idx": 12419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449715660.470, "dur": 1088.859, + "args": { + "External id": 453184,"Sequence number": 5036376, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12420 + } + }, + { + "ph": "s", "id": 214, "pid": 2338707, "tid": 2338707, "ts": 6267449715660.470, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449715728.930, "dur": 50.052, + "args": { + "External id": 453185,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449715792.299, "dur": 109.502, + "args": { + "External id": 453186,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449715914.113, "dur": 38.503, + "args": { + "External id": 453187,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449715961.990, "dur": 28.848, + "args": { + "External id": 453188,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449716016.439, "dur": 27.121, + "args": { + "External id": 453189,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267449716103.373, "dur": 20.646, + "args": { + "External id": 453190,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267449716149.676, "dur": 141.225, + "args": { + "External id": 453191,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267449716205.040, "dur": 11.693, + "args": { + "External id": 453192,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449716210.286, "dur": 5.613, + "args": { + "External id": 453193,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449716219.090, "dur": 4.988, + "args": { + "External id": 453194,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449716227.378, "dur": 1.175, + "args": { + "External id": 453195,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449716231.036, "dur": 7.102, + "args": { + "External id": 453196,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449716302.516, "dur": 54.890, + "args": { + "External id": 453197,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267449716390.112, "dur": 29.323, + "args": { + "External id": 453198,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449716429.038, "dur": 43.590, + "args": { + "External id": 453199,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449716482.294, "dur": 34.489, + "args": { + "External id": 453200,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449716541.638, "dur": 27.819, + "args": { + "External id": 453201,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449716576.969, "dur": 55.523, + "args": { + "External id": 453202,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449716654.343, "dur": 18.139, + "args": { + "External id": 453203,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12439 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 2338707, "tid": 2338707, + "ts": 6267449716833.069, "dur": 86.848, + "args": { + "External id": 453204,"Record function id": 0, "Ev Idx": 12440 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267449716997.853, "dur": 91.692, + "args": { + "External id": 453205,"Record function id": 0, "Ev Idx": 12441 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 2338707, "tid": 2338707, + "ts": 6267449717106.552, "dur": 31467.516, + "args": { + "External id": 453206,"Record function id": 0, "Ev Idx": 12442 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338707, "tid": 2338707, + "ts": 6267449717117.229, "dur": 984.119, + "args": { + "External id": 453207,"Record function id": 0, "Ev Idx": 12443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449717207.824, "dur": 9.895, + "args": { + "External id": 453208,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449717231.282, "dur": 42.546, + "args": { + "External id": 453209,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717239.077, "dur": 2.187, + "args": { + "External id": 453210,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717245.328, "dur": 0.369, + "args": { + "External id": 453211,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717249.127, "dur": 0.374, + "args": { + "External id": 453212,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717250.400, "dur": 0.683, + "args": { + "External id": 453213,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717253.728, "dur": 0.477, + "args": { + "External id": 453214,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717256.832, "dur": 0.439, + "args": { + "External id": 453215,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717258.176, "dur": 4.221, + "args": { + "External id": 453216,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717263.050, "dur": 0.341, + "args": { + "External id": 453217,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717266.511, "dur": 0.267, + "args": { + "External id": 453218,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449717286.088, "dur": 58.483, + "args": { + "External id": 453219,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267449717379.246, "dur": 141.116, + "args": { + "External id": 453220,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449717393.422, "dur": 3.877, + "args": { + "External id": 453221,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267449717402.718, "dur": 10.630, + "args": { + "External id": 453222,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449717407.539, "dur": 5.378, + "args": { + "External id": 453223,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717411.024, "dur": 0.598, + "args": { + "External id": 453224,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449717419.502, "dur": 37.677, + "args": { + "External id": 453225,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717423.791, "dur": 3.156, + "args": { + "External id": 453226,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717427.864, "dur": 0.476, + "args": { + "External id": 453227,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717429.476, "dur": 0.523, + "args": { + "External id": 453228,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717435.858, "dur": 2.650, + "args": { + "External id": 453229,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717439.519, "dur": 0.461, + "args": { + "External id": 453230,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717440.530, "dur": 0.267, + "args": { + "External id": 453231,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717444.800, "dur": 0.375, + "args": { + "External id": 453232,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717445.634, "dur": 0.377, + "args": { + "External id": 453233,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449717448.657, "dur": 3.018, + "args": { + "External id": 453234,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449717473.802, "dur": 38.296, + "args": { + "External id": 453235,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267449717578.784, "dur": 386.280, + "args": { + "External id": 453236,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449717629.285, "dur": 330.265, + "args": { + "External id": 453237,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12473, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267449717641.597, "dur": 312.062, + "args": { + "External id": 453238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449717988.526, "dur": 2.508, + "args": { + "External id": 453239,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12475, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338707, "tid": 2338707, + "ts": 6267449718125.526, "dur": 30244.452, + "args": { + "External id": 453240,"Record function id": 0, "Ev Idx": 12476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449718239.440, "dur": 7.450, + "args": { + "External id": 453241,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449718250.469, "dur": 1.031, + "args": { + "External id": 453242,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449718253.178, "dur": 3.439, + "args": { + "External id": 453243,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449718258.442, "dur": 0.748, + "args": { + "External id": 453244,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449718260.433, "dur": 0.900, + "args": { + "External id": 453245,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449718265.348, "dur": 0.680, + "args": { + "External id": 453246,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449718269.288, "dur": 1.084, + "args": { + "External id": 453247,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449718271.679, "dur": 3.401, + "args": { + "External id": 453248,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449718276.343, "dur": 0.635, + "args": { + "External id": 453249,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449718280.437, "dur": 0.934, + "args": { + "External id": 453250,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449718299.389, "dur": 30025.682, + "args": { + "External id": 453251,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449718325.334, "dur": 29991.431, + "args": { + "External id": 453252,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449718342.585, "dur": 17.592, + "args": { + "External id": 453253,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449718363.837, "dur": 29918.851, + "args": { + "External id": 453254,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449718366.486, "dur": 29915.625, + "args": { + "External id": 453255,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449718372.640, "dur": 5.140, + "args": { + "External id": 453256,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449718379.707, "dur": 29899.394, + "args": { + "External id": 453257,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449748511.402, "dur": 31.921, + "args": { + "External id": 453258,"Sequence number": 5036377, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12494 + } + }, + { + "ph": "s", "id": 213, "pid": 2338707, "tid": 2338707, "ts": 6267449748511.402, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267449748527.791, "dur": 10.613, + "args": { + "External id": 453259,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449748533.491, "dur": 4.724, + "args": { + "External id": 453260,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267449748634.022, "dur": 80.878, + "args": { + "External id": 453261,"Record function id": 0, "Ev Idx": 12497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267449748716.541, "dur": 1167.150, + "args": { + "External id": 453262,"Record function id": 0, "Ev Idx": 12498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449748759.766, "dur": 1110.360, + "args": { + "External id": 453263,"Sequence number": 5036378, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12499 + } + }, + { + "ph": "s", "id": 212, "pid": 2338707, "tid": 2338707, "ts": 6267449748759.766, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449748828.178, "dur": 52.426, + "args": { + "External id": 453264,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449748893.847, "dur": 110.669, + "args": { + "External id": 453265,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449749016.461, "dur": 74.761, + "args": { + "External id": 453266,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449749105.807, "dur": 34.847, + "args": { + "External id": 453267,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449749171.526, "dur": 29.302, + "args": { + "External id": 453268,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267449749219.542, "dur": 16.606, + "args": { + "External id": 453269,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267449749265.119, "dur": 145.523, + "args": { + "External id": 453270,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267449749321.641, "dur": 11.178, + "args": { + "External id": 453271,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449749326.731, "dur": 5.249, + "args": { + "External id": 453272,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449749336.918, "dur": 5.909, + "args": { + "External id": 453273,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449749344.129, "dur": 0.901, + "args": { + "External id": 453274,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449749347.467, "dur": 4.375, + "args": { + "External id": 453275,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449749422.004, "dur": 52.048, + "args": { + "External id": 453276,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267449749511.329, "dur": 30.988, + "args": { + "External id": 453277,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449749552.527, "dur": 59.369, + "args": { + "External id": 453278,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449749623.777, "dur": 39.543, + "args": { + "External id": 453279,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449749686.839, "dur": 27.907, + "args": { + "External id": 453280,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449749723.024, "dur": 36.870, + "args": { + "External id": 453281,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449749779.019, "dur": 17.374, + "args": { + "External id": 453282,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12518 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 2338707, "tid": 2338707, + "ts": 6267449749950.332, "dur": 82.999, + "args": { + "External id": 453283,"Record function id": 0, "Ev Idx": 12519 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267449750152.624, "dur": 50.494, + "args": { + "External id": 453284,"Record function id": 0, "Ev Idx": 12520 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 2338707, "tid": 2338707, + "ts": 6267449750212.902, "dur": 29819.900, + "args": { + "External id": 453285,"Record function id": 0, "Ev Idx": 12521 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338707, "tid": 2338707, + "ts": 6267449750222.448, "dur": 1000.147, + "args": { + "External id": 453286,"Record function id": 0, "Ev Idx": 12522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449750306.632, "dur": 9.651, + "args": { + "External id": 453287,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449750329.029, "dur": 42.572, + "args": { + "External id": 453288,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750334.474, "dur": 2.508, + "args": { + "External id": 453289,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750341.038, "dur": 0.478, + "args": { + "External id": 453290,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750344.597, "dur": 0.356, + "args": { + "External id": 453291,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750345.495, "dur": 0.514, + "args": { + "External id": 453292,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750351.433, "dur": 0.310, + "args": { + "External id": 453293,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750352.414, "dur": 0.417, + "args": { + "External id": 453294,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750353.544, "dur": 4.912, + "args": { + "External id": 453295,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750361.420, "dur": 0.420, + "args": { + "External id": 453296,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750362.330, "dur": 0.529, + "args": { + "External id": 453297,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449750386.842, "dur": 62.243, + "args": { + "External id": 453298,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267449750483.605, "dur": 156.104, + "args": { + "External id": 453299,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449750495.863, "dur": 7.396, + "args": { + "External id": 453300,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267449750508.503, "dur": 10.404, + "args": { + "External id": 453301,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449750513.276, "dur": 5.247, + "args": { + "External id": 453302,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750516.605, "dur": 0.570, + "args": { + "External id": 453303,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449750525.139, "dur": 40.226, + "args": { + "External id": 453304,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750528.848, "dur": 2.420, + "args": { + "External id": 453305,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750532.175, "dur": 0.585, + "args": { + "External id": 453306,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750535.579, "dur": 0.316, + "args": { + "External id": 453307,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750538.821, "dur": 2.210, + "args": { + "External id": 453308,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750541.779, "dur": 0.286, + "args": { + "External id": 453309,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750544.740, "dur": 0.563, + "args": { + "External id": 453310,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750548.247, "dur": 0.285, + "args": { + "External id": 453311,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750549.177, "dur": 0.303, + "args": { + "External id": 453312,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449750552.336, "dur": 5.946, + "args": { + "External id": 453313,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449750579.133, "dur": 51.190, + "args": { + "External id": 453314,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267449750698.493, "dur": 415.261, + "args": { + "External id": 453315,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449750732.371, "dur": 374.692, + "args": { + "External id": 453316,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12552, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267449750742.139, "dur": 357.455, + "args": { + "External id": 453317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449751142.332, "dur": 2.667, + "args": { + "External id": 453318,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12554, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338707, "tid": 2338707, + "ts": 6267449751244.243, "dur": 28581.594, + "args": { + "External id": 453319,"Record function id": 0, "Ev Idx": 12555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449751353.941, "dur": 7.014, + "args": { + "External id": 453320,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449751364.387, "dur": 1.294, + "args": { + "External id": 453321,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449751367.495, "dur": 3.611, + "args": { + "External id": 453322,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449751372.908, "dur": 0.928, + "args": { + "External id": 453323,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449751375.375, "dur": 1.162, + "args": { + "External id": 453324,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449751379.768, "dur": 1.037, + "args": { + "External id": 453325,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449751382.451, "dur": 1.024, + "args": { + "External id": 453326,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449751385.032, "dur": 3.403, + "args": { + "External id": 453327,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449751389.843, "dur": 0.974, + "args": { + "External id": 453328,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449751394.286, "dur": 1.063, + "args": { + "External id": 453329,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449751414.145, "dur": 28367.820, + "args": { + "External id": 453330,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449751430.996, "dur": 28342.600, + "args": { + "External id": 453331,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449751450.050, "dur": 17.395, + "args": { + "External id": 453332,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449751471.069, "dur": 28267.169, + "args": { + "External id": 453333,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449751473.728, "dur": 28263.983, + "args": { + "External id": 453334,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449751482.209, "dur": 7.806, + "args": { + "External id": 453335,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449751491.889, "dur": 28242.380, + "args": { + "External id": 453336,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449779971.603, "dur": 30.299, + "args": { + "External id": 453337,"Sequence number": 5036379, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12573 + } + }, + { + "ph": "s", "id": 211, "pid": 2338707, "tid": 2338707, "ts": 6267449779971.603, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267449779988.260, "dur": 8.772, + "args": { + "External id": 453338,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449779991.820, "dur": 4.954, + "args": { + "External id": 453339,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267449780108.018, "dur": 76.320, + "args": { + "External id": 453340,"Record function id": 0, "Ev Idx": 12576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267449780186.057, "dur": 1144.950, + "args": { + "External id": 453341,"Record function id": 0, "Ev Idx": 12577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449780227.146, "dur": 1089.707, + "args": { + "External id": 453342,"Sequence number": 5036380, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12578 + } + }, + { + "ph": "s", "id": 210, "pid": 2338707, "tid": 2338707, "ts": 6267449780227.146, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449780300.181, "dur": 51.539, + "args": { + "External id": 453343,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449780365.348, "dur": 104.510, + "args": { + "External id": 453344,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449780481.416, "dur": 37.228, + "args": { + "External id": 453345,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449780527.098, "dur": 29.479, + "args": { + "External id": 453346,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449780584.000, "dur": 44.472, + "args": { + "External id": 453347,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267449780651.562, "dur": 18.911, + "args": { + "External id": 453348,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267449780694.399, "dur": 136.016, + "args": { + "External id": 453349,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267449780749.034, "dur": 13.644, + "args": { + "External id": 453350,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449780756.428, "dur": 5.484, + "args": { + "External id": 453351,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449780764.898, "dur": 3.831, + "args": { + "External id": 453352,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449780769.656, "dur": 0.958, + "args": { + "External id": 453353,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449780773.407, "dur": 6.392, + "args": { + "External id": 453354,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449780842.125, "dur": 55.287, + "args": { + "External id": 453355,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267449780930.372, "dur": 29.346, + "args": { + "External id": 453356,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449780969.434, "dur": 42.719, + "args": { + "External id": 453357,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449781024.069, "dur": 74.831, + "args": { + "External id": 453358,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449781126.867, "dur": 29.214, + "args": { + "External id": 453359,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449781165.104, "dur": 40.112, + "args": { + "External id": 453360,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449781224.352, "dur": 17.668, + "args": { + "External id": 453361,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12597 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 2338707, "tid": 2338707, + "ts": 6267449781401.069, "dur": 83.809, + "args": { + "External id": 453362,"Record function id": 0, "Ev Idx": 12598 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267449781563.176, "dur": 70.511, + "args": { + "External id": 453363,"Record function id": 0, "Ev Idx": 12599 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 2338707, "tid": 2338707, + "ts": 6267449781644.838, "dur": 31271.259, + "args": { + "External id": 453364,"Record function id": 0, "Ev Idx": 12600 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338707, "tid": 2338707, + "ts": 6267449781657.128, "dur": 1045.326, + "args": { + "External id": 453365,"Record function id": 0, "Ev Idx": 12601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449781747.690, "dur": 10.164, + "args": { + "External id": 453366,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449781775.675, "dur": 39.284, + "args": { + "External id": 453367,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781781.252, "dur": 2.549, + "args": { + "External id": 453368,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781790.081, "dur": 0.399, + "args": { + "External id": 453369,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781791.167, "dur": 0.560, + "args": { + "External id": 453370,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781792.224, "dur": 0.319, + "args": { + "External id": 453371,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781797.432, "dur": 0.414, + "args": { + "External id": 453372,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781798.354, "dur": 0.438, + "args": { + "External id": 453373,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781801.217, "dur": 2.751, + "args": { + "External id": 453374,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781804.561, "dur": 0.507, + "args": { + "External id": 453375,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781805.823, "dur": 0.333, + "args": { + "External id": 453376,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449781827.238, "dur": 63.298, + "args": { + "External id": 453377,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267449781924.477, "dur": 174.511, + "args": { + "External id": 453378,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449781937.063, "dur": 3.455, + "args": { + "External id": 453379,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267449781949.074, "dur": 10.796, + "args": { + "External id": 453380,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449781954.257, "dur": 5.230, + "args": { + "External id": 453381,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781957.688, "dur": 0.478, + "args": { + "External id": 453382,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449781967.842, "dur": 33.201, + "args": { + "External id": 453383,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781969.442, "dur": 2.258, + "args": { + "External id": 453384,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781972.917, "dur": 0.395, + "args": { + "External id": 453385,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781975.710, "dur": 0.408, + "args": { + "External id": 453386,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781979.301, "dur": 2.162, + "args": { + "External id": 453387,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781983.494, "dur": 0.372, + "args": { + "External id": 453388,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781984.432, "dur": 0.528, + "args": { + "External id": 453389,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781987.908, "dur": 0.261, + "args": { + "External id": 453390,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781990.249, "dur": 0.566, + "args": { + "External id": 453391,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449781991.622, "dur": 2.452, + "args": { + "External id": 453392,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449782016.868, "dur": 70.613, + "args": { + "External id": 453393,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267449782157.648, "dur": 414.262, + "args": { + "External id": 453394,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449782192.166, "dur": 374.100, + "args": { + "External id": 453395,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12631, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267449782204.208, "dur": 356.327, + "args": { + "External id": 453396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449782614.790, "dur": 3.911, + "args": { + "External id": 453397,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12633, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338707, "tid": 2338707, + "ts": 6267449782724.456, "dur": 29989.100, + "args": { + "External id": 453398,"Record function id": 0, "Ev Idx": 12634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449782848.439, "dur": 7.373, + "args": { + "External id": 453399,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449782859.222, "dur": 1.157, + "args": { + "External id": 453400,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449782862.175, "dur": 3.602, + "args": { + "External id": 453401,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449782870.375, "dur": 0.834, + "args": { + "External id": 453402,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449782872.681, "dur": 1.016, + "args": { + "External id": 453403,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449782874.984, "dur": 1.008, + "args": { + "External id": 453404,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449782877.350, "dur": 1.218, + "args": { + "External id": 453405,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449782883.221, "dur": 2.649, + "args": { + "External id": 453406,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449782887.372, "dur": 1.380, + "args": { + "External id": 453407,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449782890.295, "dur": 0.860, + "args": { + "External id": 453408,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449782911.519, "dur": 29752.927, + "args": { + "External id": 453409,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449782929.502, "dur": 29726.367, + "args": { + "External id": 453410,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449782947.519, "dur": 17.266, + "args": { + "External id": 453411,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449782968.638, "dur": 29645.628, + "args": { + "External id": 453412,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449782971.108, "dur": 29642.498, + "args": { + "External id": 453413,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449782977.290, "dur": 7.475, + "args": { + "External id": 453414,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449782986.606, "dur": 29623.889, + "args": { + "External id": 453415,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449812857.229, "dur": 33.757, + "args": { + "External id": 453416,"Sequence number": 5036381, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12652 + } + }, + { + "ph": "s", "id": 209, "pid": 2338707, "tid": 2338707, "ts": 6267449812857.229, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267449812875.983, "dur": 10.612, + "args": { + "External id": 453417,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449812881.467, "dur": 4.962, + "args": { + "External id": 453418,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267449812957.913, "dur": 77.671, + "args": { + "External id": 453419,"Record function id": 0, "Ev Idx": 12655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267449813036.771, "dur": 1230.862, + "args": { + "External id": 453420,"Record function id": 0, "Ev Idx": 12656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449813111.482, "dur": 1140.817, + "args": { + "External id": 453421,"Sequence number": 5036382, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12657 + } + }, + { + "ph": "s", "id": 208, "pid": 2338707, "tid": 2338707, "ts": 6267449813111.482, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449813184.012, "dur": 53.900, + "args": { + "External id": 453422,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449813251.065, "dur": 109.636, + "args": { + "External id": 453423,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449813374.424, "dur": 37.587, + "args": { + "External id": 453424,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449813421.244, "dur": 28.914, + "args": { + "External id": 453425,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449813473.309, "dur": 28.296, + "args": { + "External id": 453426,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267449813526.515, "dur": 18.585, + "args": { + "External id": 453427,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267449813570.205, "dur": 172.296, + "args": { + "External id": 453428,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267449813642.339, "dur": 12.457, + "args": { + "External id": 453429,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449813647.540, "dur": 6.253, + "args": { + "External id": 453430,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449813657.261, "dur": 4.031, + "args": { + "External id": 453431,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449813673.635, "dur": 1.030, + "args": { + "External id": 453432,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449813679.362, "dur": 7.481, + "args": { + "External id": 453433,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449813754.588, "dur": 53.703, + "args": { + "External id": 453434,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267449813841.626, "dur": 30.892, + "args": { + "External id": 453435,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449813887.513, "dur": 43.192, + "args": { + "External id": 453436,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449813941.000, "dur": 34.891, + "args": { + "External id": 453437,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449813997.241, "dur": 34.408, + "args": { + "External id": 453438,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449814037.718, "dur": 91.413, + "args": { + "External id": 453439,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449814158.034, "dur": 20.485, + "args": { + "External id": 453440,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12676 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 2338707, "tid": 2338707, + "ts": 6267449814336.310, "dur": 90.735, + "args": { + "External id": 453441,"Record function id": 0, "Ev Idx": 12677 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267449814508.732, "dur": 54.888, + "args": { + "External id": 453442,"Record function id": 0, "Ev Idx": 12678 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 2338707, "tid": 2338707, + "ts": 6267449814573.199, "dur": 30122.030, + "args": { + "External id": 453443,"Record function id": 0, "Ev Idx": 12679 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338707, "tid": 2338707, + "ts": 6267449814588.828, "dur": 913.180, + "args": { + "External id": 453444,"Record function id": 0, "Ev Idx": 12680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449814694.269, "dur": 10.164, + "args": { + "External id": 453445,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449814717.861, "dur": 43.007, + "args": { + "External id": 453446,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814725.818, "dur": 2.463, + "args": { + "External id": 453447,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814733.041, "dur": 0.401, + "args": { + "External id": 453448,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814735.773, "dur": 0.311, + "args": { + "External id": 453449,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814737.035, "dur": 0.383, + "args": { + "External id": 453450,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814739.909, "dur": 0.331, + "args": { + "External id": 453451,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814743.202, "dur": 0.288, + "args": { + "External id": 453452,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814744.369, "dur": 3.660, + "args": { + "External id": 453453,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814748.838, "dur": 0.344, + "args": { + "External id": 453454,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814751.766, "dur": 0.311, + "args": { + "External id": 453455,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449814772.603, "dur": 58.456, + "args": { + "External id": 453456,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267449814864.895, "dur": 129.163, + "args": { + "External id": 453457,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449814878.953, "dur": 3.605, + "args": { + "External id": 453458,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267449814888.290, "dur": 12.710, + "args": { + "External id": 453459,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449814895.471, "dur": 5.140, + "args": { + "External id": 453460,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814898.438, "dur": 0.831, + "args": { + "External id": 453461,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449814907.485, "dur": 33.604, + "args": { + "External id": 453462,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814910.460, "dur": 2.475, + "args": { + "External id": 453463,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814913.603, "dur": 0.646, + "args": { + "External id": 453464,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814914.941, "dur": 0.277, + "args": { + "External id": 453465,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814919.855, "dur": 2.709, + "args": { + "External id": 453466,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814923.564, "dur": 0.373, + "args": { + "External id": 453467,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814924.942, "dur": 0.371, + "args": { + "External id": 453468,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814930.438, "dur": 0.285, + "args": { + "External id": 453469,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814931.548, "dur": 0.470, + "args": { + "External id": 453470,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449814934.607, "dur": 2.049, + "args": { + "External id": 453471,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449814952.199, "dur": 33.498, + "args": { + "External id": 453472,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267449815047.618, "dur": 355.889, + "args": { + "External id": 453473,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449815120.164, "dur": 278.014, + "args": { + "External id": 453474,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12710, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267449815132.264, "dur": 260.721, + "args": { + "External id": 453475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449815425.880, "dur": 2.313, + "args": { + "External id": 453476,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12712, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338707, "tid": 2338707, + "ts": 6267449815523.004, "dur": 28953.246, + "args": { + "External id": 453477,"Record function id": 0, "Ev Idx": 12713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449815652.327, "dur": 6.784, + "args": { + "External id": 453478,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449815662.871, "dur": 0.905, + "args": { + "External id": 453479,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449815665.538, "dur": 3.518, + "args": { + "External id": 453480,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449815670.953, "dur": 0.872, + "args": { + "External id": 453481,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449815673.443, "dur": 0.861, + "args": { + "External id": 453482,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449815675.496, "dur": 0.971, + "args": { + "External id": 453483,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449815681.844, "dur": 0.774, + "args": { + "External id": 453484,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449815683.938, "dur": 2.344, + "args": { + "External id": 453485,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449815687.620, "dur": 0.749, + "args": { + "External id": 453486,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449815689.716, "dur": 0.707, + "args": { + "External id": 453487,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449815721.725, "dur": 28708.875, + "args": { + "External id": 453488,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449815740.417, "dur": 28681.930, + "args": { + "External id": 453489,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449815755.586, "dur": 21.650, + "args": { + "External id": 453490,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449815780.741, "dur": 28606.609, + "args": { + "External id": 453491,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449815783.410, "dur": 28603.409, + "args": { + "External id": 453492,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449815789.024, "dur": 5.534, + "args": { + "External id": 453493,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449815796.218, "dur": 28587.464, + "args": { + "External id": 453494,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449844634.641, "dur": 32.249, + "args": { + "External id": 453495,"Sequence number": 5036383, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12731 + } + }, + { + "ph": "s", "id": 207, "pid": 2338707, "tid": 2338707, "ts": 6267449844634.641, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267449844651.518, "dur": 10.389, + "args": { + "External id": 453496,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449844656.660, "dur": 4.927, + "args": { + "External id": 453497,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267449844738.475, "dur": 79.266, + "args": { + "External id": 453498,"Record function id": 0, "Ev Idx": 12734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267449844819.099, "dur": 1147.555, + "args": { + "External id": 453499,"Record function id": 0, "Ev Idx": 12735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449844859.741, "dur": 1092.834, + "args": { + "External id": 453500,"Sequence number": 5036384, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12736 + } + }, + { + "ph": "s", "id": 206, "pid": 2338707, "tid": 2338707, "ts": 6267449844859.741, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449844926.902, "dur": 49.360, + "args": { + "External id": 453501,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449844989.127, "dur": 136.512, + "args": { + "External id": 453502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449845145.778, "dur": 43.523, + "args": { + "External id": 453503,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449845199.436, "dur": 30.606, + "args": { + "External id": 453504,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449845258.768, "dur": 30.211, + "args": { + "External id": 453505,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267449845310.589, "dur": 19.694, + "args": { + "External id": 453506,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267449845354.589, "dur": 134.689, + "args": { + "External id": 453507,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267449845407.120, "dur": 11.136, + "args": { + "External id": 453508,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449845412.317, "dur": 5.165, + "args": { + "External id": 453509,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449845421.956, "dur": 4.240, + "args": { + "External id": 453510,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449845427.173, "dur": 1.231, + "args": { + "External id": 453511,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449845430.766, "dur": 5.341, + "args": { + "External id": 453512,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449845500.117, "dur": 53.713, + "args": { + "External id": 453513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267449845586.089, "dur": 51.821, + "args": { + "External id": 453514,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449845650.818, "dur": 47.400, + "args": { + "External id": 453515,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449845707.497, "dur": 38.326, + "args": { + "External id": 453516,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449845773.520, "dur": 26.725, + "args": { + "External id": 453517,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449845809.050, "dur": 34.647, + "args": { + "External id": 453518,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449845863.651, "dur": 17.603, + "args": { + "External id": 453519,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12755 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 2338707, "tid": 2338707, + "ts": 6267449846034.444, "dur": 119.446, + "args": { + "External id": 453520,"Record function id": 0, "Ev Idx": 12756 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267449846233.829, "dur": 51.426, + "args": { + "External id": 453521,"Record function id": 0, "Ev Idx": 12757 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 2338707, "tid": 2338707, + "ts": 6267449846294.874, "dur": 30014.927, + "args": { + "External id": 453522,"Record function id": 0, "Ev Idx": 12758 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338707, "tid": 2338707, + "ts": 6267449846306.739, "dur": 997.193, + "args": { + "External id": 453523,"Record function id": 0, "Ev Idx": 12759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449846394.746, "dur": 9.889, + "args": { + "External id": 453524,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449846418.386, "dur": 42.627, + "args": { + "External id": 453525,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846424.709, "dur": 2.599, + "args": { + "External id": 453526,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846432.034, "dur": 0.495, + "args": { + "External id": 453527,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846434.211, "dur": 0.756, + "args": { + "External id": 453528,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846437.080, "dur": 0.434, + "args": { + "External id": 453529,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846441.382, "dur": 0.374, + "args": { + "External id": 453530,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846443.100, "dur": 0.670, + "args": { + "External id": 453531,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846445.805, "dur": 4.418, + "args": { + "External id": 453532,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846451.931, "dur": 0.343, + "args": { + "External id": 453533,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846453.552, "dur": 0.527, + "args": { + "External id": 453534,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449846473.239, "dur": 58.231, + "args": { + "External id": 453535,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267449846567.200, "dur": 166.548, + "args": { + "External id": 453536,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449846579.949, "dur": 3.651, + "args": { + "External id": 453537,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267449846589.113, "dur": 32.140, + "args": { + "External id": 453538,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449846613.483, "dur": 7.351, + "args": { + "External id": 453539,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846618.319, "dur": 0.675, + "args": { + "External id": 453540,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449846629.613, "dur": 37.342, + "args": { + "External id": 453541,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846632.373, "dur": 2.483, + "args": { + "External id": 453542,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846639.906, "dur": 0.367, + "args": { + "External id": 453543,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846642.319, "dur": 0.608, + "args": { + "External id": 453544,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846646.227, "dur": 2.667, + "args": { + "External id": 453545,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846650.552, "dur": 0.335, + "args": { + "External id": 453546,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846652.338, "dur": 1.649, + "args": { + "External id": 453547,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846655.668, "dur": 0.320, + "args": { + "External id": 453548,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846657.354, "dur": 0.378, + "args": { + "External id": 453549,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449846662.041, "dur": 0.264, + "args": { + "External id": 453550,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449846687.125, "dur": 37.828, + "args": { + "External id": 453551,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267449846790.467, "dur": 405.483, + "args": { + "External id": 453552,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449846825.016, "dur": 364.616, + "args": { + "External id": 453553,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12789, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267449846837.387, "dur": 345.309, + "args": { + "External id": 453554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449847222.370, "dur": 2.547, + "args": { + "External id": 453555,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12791, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338707, "tid": 2338707, + "ts": 6267449847326.279, "dur": 28781.633, + "args": { + "External id": 453556,"Record function id": 0, "Ev Idx": 12792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449847438.929, "dur": 6.776, + "args": { + "External id": 453557,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449847450.018, "dur": 0.848, + "args": { + "External id": 453558,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449847452.865, "dur": 4.454, + "args": { + "External id": 453559,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449847459.272, "dur": 0.886, + "args": { + "External id": 453560,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449847462.041, "dur": 0.908, + "args": { + "External id": 453561,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449847464.615, "dur": 0.864, + "args": { + "External id": 453562,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449847469.392, "dur": 0.785, + "args": { + "External id": 453563,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449847471.640, "dur": 1.942, + "args": { + "External id": 453564,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449847475.508, "dur": 0.793, + "args": { + "External id": 453565,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449847477.633, "dur": 0.543, + "args": { + "External id": 453566,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449847502.030, "dur": 28539.283, + "args": { + "External id": 453567,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449847518.682, "dur": 28514.385, + "args": { + "External id": 453568,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449847537.512, "dur": 20.227, + "args": { + "External id": 453569,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449847561.590, "dur": 28436.869, + "args": { + "External id": 453570,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449847564.283, "dur": 28433.518, + "args": { + "External id": 453571,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449847570.870, "dur": 6.406, + "args": { + "External id": 453572,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449847579.017, "dur": 28415.694, + "args": { + "External id": 453573,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449876255.230, "dur": 30.646, + "args": { + "External id": 453574,"Sequence number": 5036385, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12810 + } + }, + { + "ph": "s", "id": 205, "pid": 2338707, "tid": 2338707, "ts": 6267449876255.230, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267449876271.987, "dur": 8.769, + "args": { + "External id": 453575,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449876275.899, "dur": 4.613, + "args": { + "External id": 453576,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267449876351.230, "dur": 78.726, + "args": { + "External id": 453577,"Record function id": 0, "Ev Idx": 12813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267449876431.785, "dur": 1149.736, + "args": { + "External id": 453578,"Record function id": 0, "Ev Idx": 12814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449876471.229, "dur": 1095.710, + "args": { + "External id": 453579,"Sequence number": 5036386, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12815 + } + }, + { + "ph": "s", "id": 204, "pid": 2338707, "tid": 2338707, "ts": 6267449876471.229, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449876536.881, "dur": 50.232, + "args": { + "External id": 453580,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449876616.032, "dur": 111.385, + "args": { + "External id": 453581,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449876741.468, "dur": 38.044, + "args": { + "External id": 453582,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449876787.872, "dur": 29.947, + "args": { + "External id": 453583,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449876846.090, "dur": 28.720, + "args": { + "External id": 453584,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267449876895.326, "dur": 16.702, + "args": { + "External id": 453585,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267449876935.281, "dur": 179.253, + "args": { + "External id": 453586,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267449876993.970, "dur": 12.070, + "args": { + "External id": 453587,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449876999.577, "dur": 5.510, + "args": { + "External id": 453588,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449877008.827, "dur": 5.104, + "args": { + "External id": 453589,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449877015.136, "dur": 0.945, + "args": { + "External id": 453590,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449877018.692, "dur": 5.638, + "args": { + "External id": 453591,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449877128.499, "dur": 59.587, + "args": { + "External id": 453592,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267449877225.584, "dur": 31.848, + "args": { + "External id": 453593,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449877274.374, "dur": 44.462, + "args": { + "External id": 453594,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449877327.874, "dur": 34.969, + "args": { + "External id": 453595,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449877389.944, "dur": 27.781, + "args": { + "External id": 453596,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449877425.374, "dur": 35.719, + "args": { + "External id": 453597,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449877479.709, "dur": 18.106, + "args": { + "External id": 453598,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12834 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 2338707, "tid": 2338707, + "ts": 6267449877667.651, "dur": 81.261, + "args": { + "External id": 453599,"Record function id": 0, "Ev Idx": 12835 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267449877826.598, "dur": 50.372, + "args": { + "External id": 453600,"Record function id": 0, "Ev Idx": 12836 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 2338707, "tid": 2338707, + "ts": 6267449877885.853, "dur": 31480.233, + "args": { + "External id": 453601,"Record function id": 0, "Ev Idx": 12837 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338707, "tid": 2338707, + "ts": 6267449877895.001, "dur": 1008.119, + "args": { + "External id": 453602,"Record function id": 0, "Ev Idx": 12838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449877985.673, "dur": 9.817, + "args": { + "External id": 453603,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449878009.952, "dur": 40.755, + "args": { + "External id": 453604,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878016.206, "dur": 2.353, + "args": { + "External id": 453605,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878023.864, "dur": 0.456, + "args": { + "External id": 453606,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878025.789, "dur": 0.537, + "args": { + "External id": 453607,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878027.957, "dur": 0.585, + "args": { + "External id": 453608,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878031.903, "dur": 0.410, + "args": { + "External id": 453609,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878034.255, "dur": 0.461, + "args": { + "External id": 453610,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878036.136, "dur": 4.277, + "args": { + "External id": 453611,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878041.607, "dur": 0.383, + "args": { + "External id": 453612,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878043.758, "dur": 0.390, + "args": { + "External id": 453613,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449878098.196, "dur": 55.614, + "args": { + "External id": 453614,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267449878192.562, "dur": 132.595, + "args": { + "External id": 453615,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449878207.299, "dur": 5.544, + "args": { + "External id": 453616,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267449878218.819, "dur": 14.295, + "args": { + "External id": 453617,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449878226.070, "dur": 6.627, + "args": { + "External id": 453618,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878230.518, "dur": 0.489, + "args": { + "External id": 453619,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449878240.030, "dur": 34.399, + "args": { + "External id": 453620,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878242.701, "dur": 2.729, + "args": { + "External id": 453621,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878246.983, "dur": 0.449, + "args": { + "External id": 453622,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878249.317, "dur": 0.581, + "args": { + "External id": 453623,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878253.443, "dur": 1.920, + "args": { + "External id": 453624,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878257.070, "dur": 0.228, + "args": { + "External id": 453625,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878258.841, "dur": 0.311, + "args": { + "External id": 453626,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878262.782, "dur": 0.244, + "args": { + "External id": 453627,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878265.139, "dur": 0.275, + "args": { + "External id": 453628,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449878266.810, "dur": 2.496, + "args": { + "External id": 453629,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449878285.769, "dur": 31.579, + "args": { + "External id": 453630,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267449878382.929, "dur": 414.474, + "args": { + "External id": 453631,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449878417.711, "dur": 373.373, + "args": { + "External id": 453632,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12868, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267449878459.303, "dur": 325.662, + "args": { + "External id": 453633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449878823.712, "dur": 2.260, + "args": { + "External id": 453634,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12870, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338707, "tid": 2338707, + "ts": 6267449878924.362, "dur": 30240.104, + "args": { + "External id": 453635,"Record function id": 0, "Ev Idx": 12871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449879029.979, "dur": 7.063, + "args": { + "External id": 453636,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449879041.537, "dur": 1.305, + "args": { + "External id": 453637,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449879044.812, "dur": 3.134, + "args": { + "External id": 453638,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449879049.868, "dur": 0.910, + "args": { + "External id": 453639,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449879086.564, "dur": 1.669, + "args": { + "External id": 453640,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449879091.944, "dur": 1.048, + "args": { + "External id": 453641,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449879097.031, "dur": 1.033, + "args": { + "External id": 453642,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449879099.568, "dur": 2.171, + "args": { + "External id": 453643,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449879103.303, "dur": 0.651, + "args": { + "External id": 453644,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449879105.610, "dur": 0.964, + "args": { + "External id": 453645,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449879128.553, "dur": 29988.826, + "args": { + "External id": 453646,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449879146.428, "dur": 29962.537, + "args": { + "External id": 453647,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449879166.095, "dur": 16.037, + "args": { + "External id": 453648,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449879185.856, "dur": 29862.210, + "args": { + "External id": 453649,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449879188.811, "dur": 29858.673, + "args": { + "External id": 453650,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449879195.286, "dur": 6.090, + "args": { + "External id": 453651,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449879203.114, "dur": 29841.317, + "args": { + "External id": 453652,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449909311.755, "dur": 29.007, + "args": { + "External id": 453653,"Sequence number": 5036387, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12889 + } + }, + { + "ph": "s", "id": 203, "pid": 2338707, "tid": 2338707, "ts": 6267449909311.755, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267449909327.201, "dur": 8.838, + "args": { + "External id": 453654,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449909331.191, "dur": 4.595, + "args": { + "External id": 453655,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267449909407.926, "dur": 75.080, + "args": { + "External id": 453656,"Record function id": 0, "Ev Idx": 12892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267449909485.024, "dur": 1140.165, + "args": { + "External id": 453657,"Record function id": 0, "Ev Idx": 12893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449909526.200, "dur": 1083.920, + "args": { + "External id": 453658,"Sequence number": 5036388, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12894 + } + }, + { + "ph": "s", "id": 202, "pid": 2338707, "tid": 2338707, "ts": 6267449909526.200, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449909610.713, "dur": 52.262, + "args": { + "External id": 453659,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449909677.833, "dur": 108.985, + "args": { + "External id": 453660,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449909800.495, "dur": 36.830, + "args": { + "External id": 453661,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449909845.079, "dur": 29.449, + "args": { + "External id": 453662,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449909902.012, "dur": 25.563, + "args": { + "External id": 453663,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267449909947.331, "dur": 16.685, + "args": { + "External id": 453664,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267449909987.812, "dur": 176.936, + "args": { + "External id": 453665,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267449910040.276, "dur": 11.886, + "args": { + "External id": 453666,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449910046.461, "dur": 4.906, + "args": { + "External id": 453667,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449910091.028, "dur": 6.533, + "args": { + "External id": 453668,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449910099.121, "dur": 0.836, + "args": { + "External id": 453669,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449910102.804, "dur": 5.166, + "args": { + "External id": 453670,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449910177.979, "dur": 54.383, + "args": { + "External id": 453671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267449910266.313, "dur": 29.675, + "args": { + "External id": 453672,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449910306.119, "dur": 42.143, + "args": { + "External id": 453673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449910356.916, "dur": 35.391, + "args": { + "External id": 453674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449910417.145, "dur": 25.036, + "args": { + "External id": 453675,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449910449.767, "dur": 34.642, + "args": { + "External id": 453676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449910501.858, "dur": 17.649, + "args": { + "External id": 453677,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12913 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 2338707, "tid": 2338707, + "ts": 6267449910698.743, "dur": 85.860, + "args": { + "External id": 453678,"Record function id": 0, "Ev Idx": 12914 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267449910863.376, "dur": 48.433, + "args": { + "External id": 453679,"Record function id": 0, "Ev Idx": 12915 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 2338707, "tid": 2338707, + "ts": 6267449910920.855, "dur": 31544.468, + "args": { + "External id": 453680,"Record function id": 0, "Ev Idx": 12916 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338707, "tid": 2338707, + "ts": 6267449910930.060, "dur": 1000.960, + "args": { + "External id": 453681,"Record function id": 0, "Ev Idx": 12917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449911018.337, "dur": 9.019, + "args": { + "External id": 453682,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449911040.963, "dur": 77.594, + "args": { + "External id": 453683,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911047.005, "dur": 2.567, + "args": { + "External id": 453684,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911089.489, "dur": 1.027, + "args": { + "External id": 453685,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911093.385, "dur": 0.395, + "args": { + "External id": 453686,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911095.428, "dur": 0.468, + "args": { + "External id": 453687,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911099.550, "dur": 0.638, + "args": { + "External id": 453688,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911101.730, "dur": 0.454, + "args": { + "External id": 453689,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911103.511, "dur": 4.132, + "args": { + "External id": 453690,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911109.598, "dur": 0.336, + "args": { + "External id": 453691,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911111.363, "dur": 0.328, + "args": { + "External id": 453692,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449911135.809, "dur": 58.623, + "args": { + "External id": 453693,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267449911233.223, "dur": 136.593, + "args": { + "External id": 453694,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449911247.220, "dur": 6.804, + "args": { + "External id": 453695,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267449911259.514, "dur": 13.410, + "args": { + "External id": 453696,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449911266.601, "dur": 5.888, + "args": { + "External id": 453697,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911270.862, "dur": 0.452, + "args": { + "External id": 453698,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449911280.021, "dur": 30.611, + "args": { + "External id": 453699,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911282.273, "dur": 0.608, + "args": { + "External id": 453700,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911284.488, "dur": 2.800, + "args": { + "External id": 453701,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911288.584, "dur": 0.507, + "args": { + "External id": 453702,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911290.703, "dur": 2.653, + "args": { + "External id": 453703,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911296.835, "dur": 0.694, + "args": { + "External id": 453704,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911298.834, "dur": 0.382, + "args": { + "External id": 453705,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911300.660, "dur": 0.359, + "args": { + "External id": 453706,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911303.939, "dur": 0.290, + "args": { + "External id": 453707,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449911305.875, "dur": 0.570, + "args": { + "External id": 453708,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449911324.066, "dur": 37.245, + "args": { + "External id": 453709,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267449911427.588, "dur": 400.568, + "args": { + "External id": 453710,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449911463.176, "dur": 359.048, + "args": { + "External id": 453711,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12947, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267449911474.467, "dur": 341.194, + "args": { + "External id": 453712,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449911854.643, "dur": 2.557, + "args": { + "External id": 453713,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12949, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338707, "tid": 2338707, + "ts": 6267449911952.965, "dur": 30307.178, + "args": { + "External id": 453714,"Record function id": 0, "Ev Idx": 12950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449912089.488, "dur": 7.405, + "args": { + "External id": 453715,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449912102.833, "dur": 0.786, + "args": { + "External id": 453716,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449912105.743, "dur": 3.733, + "args": { + "External id": 453717,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449912111.871, "dur": 0.978, + "args": { + "External id": 453718,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449912114.565, "dur": 1.128, + "args": { + "External id": 453719,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449912117.273, "dur": 1.053, + "args": { + "External id": 453720,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449912122.613, "dur": 0.914, + "args": { + "External id": 453721,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449912124.911, "dur": 2.314, + "args": { + "External id": 453722,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449912128.611, "dur": 0.698, + "args": { + "External id": 453723,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449912131.141, "dur": 0.847, + "args": { + "External id": 453724,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449912153.726, "dur": 30059.244, + "args": { + "External id": 453725,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449912170.147, "dur": 30034.228, + "args": { + "External id": 453726,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449912192.602, "dur": 19.032, + "args": { + "External id": 453727,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449912215.245, "dur": 29953.338, + "args": { + "External id": 453728,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449912218.307, "dur": 29949.632, + "args": { + "External id": 453729,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449912224.659, "dur": 7.606, + "args": { + "External id": 453730,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449912234.154, "dur": 29930.449, + "args": { + "External id": 453731,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449942407.311, "dur": 32.328, + "args": { + "External id": 453732,"Sequence number": 5036389, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12968 + } + }, + { + "ph": "s", "id": 201, "pid": 2338707, "tid": 2338707, "ts": 6267449942407.311, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267449942425.270, "dur": 9.157, + "args": { + "External id": 453733,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449942429.333, "dur": 4.914, + "args": { + "External id": 453734,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267449942510.859, "dur": 78.792, + "args": { + "External id": 453735,"Record function id": 0, "Ev Idx": 12971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267449942591.121, "dur": 1164.254, + "args": { + "External id": 453736,"Record function id": 0, "Ev Idx": 12972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449942647.234, "dur": 1094.334, + "args": { + "External id": 453737,"Sequence number": 5036390, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12973 + } + }, + { + "ph": "s", "id": 200, "pid": 2338707, "tid": 2338707, "ts": 6267449942647.234, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449942715.886, "dur": 50.145, + "args": { + "External id": 453738,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449942779.328, "dur": 106.251, + "args": { + "External id": 453739,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449942898.894, "dur": 40.853, + "args": { + "External id": 453740,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449942948.684, "dur": 29.694, + "args": { + "External id": 453741,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449943005.847, "dur": 27.445, + "args": { + "External id": 453742,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267449943088.569, "dur": 25.301, + "args": { + "External id": 453743,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267449943140.001, "dur": 136.940, + "args": { + "External id": 453744,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267449943194.070, "dur": 12.894, + "args": { + "External id": 453745,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449943200.372, "dur": 5.634, + "args": { + "External id": 453746,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449943209.708, "dur": 3.882, + "args": { + "External id": 453747,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449943214.956, "dur": 1.070, + "args": { + "External id": 453748,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449943219.068, "dur": 6.807, + "args": { + "External id": 453749,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449943287.634, "dur": 54.793, + "args": { + "External id": 453750,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267449943376.581, "dur": 32.603, + "args": { + "External id": 453751,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449943420.368, "dur": 43.861, + "args": { + "External id": 453752,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449943474.056, "dur": 35.467, + "args": { + "External id": 453753,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449943530.562, "dur": 27.847, + "args": { + "External id": 453754,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449943567.898, "dur": 52.060, + "args": { + "External id": 453755,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449943644.398, "dur": 21.055, + "args": { + "External id": 453756,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12992 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 2338707, "tid": 2338707, + "ts": 6267449943822.221, "dur": 80.585, + "args": { + "External id": 453757,"Record function id": 0, "Ev Idx": 12993 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267449943978.766, "dur": 47.638, + "args": { + "External id": 453758,"Record function id": 0, "Ev Idx": 12994 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 2338707, "tid": 2338707, + "ts": 6267449944035.924, "dur": 30933.029, + "args": { + "External id": 453759,"Record function id": 0, "Ev Idx": 12995 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338707, "tid": 2338707, + "ts": 6267449944044.504, "dur": 989.043, + "args": { + "External id": 453760,"Record function id": 0, "Ev Idx": 12996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449944174.468, "dur": 9.742, + "args": { + "External id": 453761,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449944200.866, "dur": 43.203, + "args": { + "External id": 453762,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944207.041, "dur": 2.438, + "args": { + "External id": 453763,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944213.565, "dur": 0.593, + "args": { + "External id": 453764,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944215.297, "dur": 0.594, + "args": { + "External id": 453765,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944220.489, "dur": 2.357, + "args": { + "External id": 453766,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944224.519, "dur": 0.696, + "args": { + "External id": 453767,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944226.944, "dur": 0.517, + "args": { + "External id": 453768,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944231.546, "dur": 2.767, + "args": { + "External id": 453769,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944235.328, "dur": 0.520, + "args": { + "External id": 453770,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944237.001, "dur": 0.256, + "args": { + "External id": 453771,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449944256.330, "dur": 62.713, + "args": { + "External id": 453772,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267449944358.758, "dur": 126.213, + "args": { + "External id": 453773,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449944371.619, "dur": 3.890, + "args": { + "External id": 453774,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267449944380.919, "dur": 15.616, + "args": { + "External id": 453775,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449944388.397, "dur": 7.669, + "args": { + "External id": 453776,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944392.286, "dur": 2.404, + "args": { + "External id": 453777,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449944403.327, "dur": 29.497, + "args": { + "External id": 453778,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944405.383, "dur": 0.236, + "args": { + "External id": 453779,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944406.990, "dur": 0.672, + "args": { + "External id": 453780,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944408.956, "dur": 0.421, + "args": { + "External id": 453781,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944412.957, "dur": 2.352, + "args": { + "External id": 453782,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944416.498, "dur": 0.684, + "args": { + "External id": 453783,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944418.644, "dur": 2.622, + "args": { + "External id": 453784,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944422.428, "dur": 0.531, + "args": { + "External id": 453785,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944424.066, "dur": 0.583, + "args": { + "External id": 453786,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449944428.132, "dur": 0.249, + "args": { + "External id": 453787,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449944444.062, "dur": 32.607, + "args": { + "External id": 453788,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267449944540.773, "dur": 388.589, + "args": { + "External id": 453789,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449944574.173, "dur": 348.764, + "args": { + "External id": 453790,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13026, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267449944585.505, "dur": 329.220, + "args": { + "External id": 453791,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449944955.288, "dur": 2.324, + "args": { + "External id": 453792,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13028, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338707, "tid": 2338707, + "ts": 6267449945090.801, "dur": 29677.609, + "args": { + "External id": 453793,"Record function id": 0, "Ev Idx": 13029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449945205.707, "dur": 7.253, + "args": { + "External id": 453794,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449945217.828, "dur": 1.040, + "args": { + "External id": 453795,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449945220.680, "dur": 3.167, + "args": { + "External id": 453796,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449945225.912, "dur": 0.835, + "args": { + "External id": 453797,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449945228.225, "dur": 0.965, + "args": { + "External id": 453798,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449945230.824, "dur": 1.044, + "args": { + "External id": 453799,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449945235.951, "dur": 1.348, + "args": { + "External id": 453800,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449945238.704, "dur": 1.996, + "args": { + "External id": 453801,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449945242.085, "dur": 1.143, + "args": { + "External id": 453802,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449945244.539, "dur": 0.469, + "args": { + "External id": 453803,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449945265.204, "dur": 29458.428, + "args": { + "External id": 453804,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449945281.122, "dur": 29434.444, + "args": { + "External id": 453805,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449945303.154, "dur": 18.433, + "args": { + "External id": 453806,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449945325.191, "dur": 29353.133, + "args": { + "External id": 453807,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449945328.000, "dur": 29349.727, + "args": { + "External id": 453808,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449945334.190, "dur": 6.721, + "args": { + "External id": 453809,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449945342.730, "dur": 29331.883, + "args": { + "External id": 453810,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449974913.724, "dur": 30.062, + "args": { + "External id": 453811,"Sequence number": 5036391, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13047 + } + }, + { + "ph": "s", "id": 199, "pid": 2338707, "tid": 2338707, "ts": 6267449974913.724, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267449974930.485, "dur": 8.279, + "args": { + "External id": 453812,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449974933.949, "dur": 4.664, + "args": { + "External id": 453813,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267449975011.511, "dur": 106.936, + "args": { + "External id": 453814,"Record function id": 0, "Ev Idx": 13050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267449975121.545, "dur": 1154.795, + "args": { + "External id": 453815,"Record function id": 0, "Ev Idx": 13051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267449975166.847, "dur": 1094.654, + "args": { + "External id": 453816,"Sequence number": 5036392, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13052 + } + }, + { + "ph": "s", "id": 198, "pid": 2338707, "tid": 2338707, "ts": 6267449975166.847, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449975235.226, "dur": 52.174, + "args": { + "External id": 453817,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449975300.220, "dur": 107.644, + "args": { + "External id": 453818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449975420.975, "dur": 38.986, + "args": { + "External id": 453819,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449975469.378, "dur": 29.510, + "args": { + "External id": 453820,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449975524.102, "dur": 28.381, + "args": { + "External id": 453821,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267449975572.960, "dur": 36.179, + "args": { + "External id": 453822,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267449975636.288, "dur": 139.602, + "args": { + "External id": 453823,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267449975690.924, "dur": 12.214, + "args": { + "External id": 453824,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449975696.405, "dur": 5.913, + "args": { + "External id": 453825,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449975705.955, "dur": 5.289, + "args": { + "External id": 453826,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449975712.522, "dur": 0.931, + "args": { + "External id": 453827,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449975715.982, "dur": 4.974, + "args": { + "External id": 453828,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449975786.393, "dur": 54.122, + "args": { + "External id": 453829,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267449975874.427, "dur": 29.849, + "args": { + "External id": 453830,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449975913.835, "dur": 43.550, + "args": { + "External id": 453831,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449975965.840, "dur": 34.610, + "args": { + "External id": 453832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267449976032.096, "dur": 65.053, + "args": { + "External id": 453833,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267449976108.551, "dur": 41.392, + "args": { + "External id": 453834,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267449976170.964, "dur": 20.432, + "args": { + "External id": 453835,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13071 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 2338707, "tid": 2338707, + "ts": 6267449976345.460, "dur": 79.771, + "args": { + "External id": 453836,"Record function id": 0, "Ev Idx": 13072 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267449976499.378, "dur": 47.189, + "args": { + "External id": 453837,"Record function id": 0, "Ev Idx": 13073 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 2338707, "tid": 2338707, + "ts": 6267449976556.597, "dur": 31781.098, + "args": { + "External id": 453838,"Record function id": 0, "Ev Idx": 13074 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338707, "tid": 2338707, + "ts": 6267449976563.715, "dur": 996.784, + "args": { + "External id": 453839,"Record function id": 0, "Ev Idx": 13075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449976668.178, "dur": 10.504, + "args": { + "External id": 453840,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449976693.327, "dur": 37.746, + "args": { + "External id": 453841,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976699.042, "dur": 2.625, + "args": { + "External id": 453842,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976706.065, "dur": 0.368, + "args": { + "External id": 453843,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976707.767, "dur": 0.555, + "args": { + "External id": 453844,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976709.288, "dur": 0.610, + "args": { + "External id": 453845,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976713.323, "dur": 0.570, + "args": { + "External id": 453846,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976715.077, "dur": 0.490, + "args": { + "External id": 453847,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976716.740, "dur": 4.936, + "args": { + "External id": 453848,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976722.727, "dur": 0.544, + "args": { + "External id": 453849,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976724.246, "dur": 0.434, + "args": { + "External id": 453850,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449976743.123, "dur": 57.263, + "args": { + "External id": 453851,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267449976838.149, "dur": 123.383, + "args": { + "External id": 453852,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449976851.596, "dur": 3.647, + "args": { + "External id": 453853,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267449976860.280, "dur": 11.071, + "args": { + "External id": 453854,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267449976865.225, "dur": 5.702, + "args": { + "External id": 453855,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976868.972, "dur": 0.741, + "args": { + "External id": 453856,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267449976878.267, "dur": 28.016, + "args": { + "External id": 453857,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976880.108, "dur": 2.500, + "args": { + "External id": 453858,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976883.936, "dur": 0.304, + "args": { + "External id": 453859,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976885.245, "dur": 0.312, + "args": { + "External id": 453860,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976889.165, "dur": 2.132, + "args": { + "External id": 453861,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976892.264, "dur": 0.274, + "args": { + "External id": 453862,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976893.402, "dur": 0.241, + "args": { + "External id": 453863,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976896.921, "dur": 0.402, + "args": { + "External id": 453864,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976898.231, "dur": 0.569, + "args": { + "External id": 453865,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449976899.598, "dur": 2.672, + "args": { + "External id": 453866,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449976918.967, "dur": 34.776, + "args": { + "External id": 453867,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267449977016.032, "dur": 438.715, + "args": { + "External id": 453868,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449977050.075, "dur": 398.460, + "args": { + "External id": 453869,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13105, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267449977098.333, "dur": 344.753, + "args": { + "External id": 453870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267449977481.575, "dur": 2.668, + "args": { + "External id": 453871,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13107, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338707, "tid": 2338707, + "ts": 6267449977581.736, "dur": 30555.011, + "args": { + "External id": 453872,"Record function id": 0, "Ev Idx": 13108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449977709.827, "dur": 8.005, + "args": { + "External id": 453873,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449977722.971, "dur": 1.039, + "args": { + "External id": 453874,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449977725.754, "dur": 3.441, + "args": { + "External id": 453875,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449977731.132, "dur": 1.034, + "args": { + "External id": 453876,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449977733.820, "dur": 0.914, + "args": { + "External id": 453877,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449977736.213, "dur": 0.655, + "args": { + "External id": 453878,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449977740.723, "dur": 1.210, + "args": { + "External id": 453879,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449977743.445, "dur": 2.154, + "args": { + "External id": 453880,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449977746.927, "dur": 0.887, + "args": { + "External id": 453881,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267449977749.027, "dur": 0.875, + "args": { + "External id": 453882,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449977771.647, "dur": 30319.876, + "args": { + "External id": 453883,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449977787.515, "dur": 30295.255, + "args": { + "External id": 453884,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267449977808.165, "dur": 18.129, + "args": { + "External id": 453885,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267449977829.574, "dur": 30193.840, + "args": { + "External id": 453886,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267449977832.531, "dur": 30190.383, + "args": { + "External id": 453887,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267449977838.772, "dur": 5.482, + "args": { + "External id": 453888,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267449977846.239, "dur": 30173.586, + "args": { + "External id": 453889,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450008282.710, "dur": 29.991, + "args": { + "External id": 453890,"Sequence number": 5036393, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13126 + } + }, + { + "ph": "s", "id": 197, "pid": 2338707, "tid": 2338707, "ts": 6267450008282.710, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450008298.948, "dur": 8.627, + "args": { + "External id": 453891,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450008302.728, "dur": 4.695, + "args": { + "External id": 453892,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267450008379.032, "dur": 77.802, + "args": { + "External id": 453893,"Record function id": 0, "Ev Idx": 13129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267450008458.255, "dur": 1148.226, + "args": { + "External id": 453894,"Record function id": 0, "Ev Idx": 13130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450008499.637, "dur": 1078.070, + "args": { + "External id": 453895,"Sequence number": 5036394, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13131 + } + }, + { + "ph": "s", "id": 196, "pid": 2338707, "tid": 2338707, "ts": 6267450008499.637, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450008567.115, "dur": 65.936, + "args": { + "External id": 453896,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450008648.305, "dur": 107.798, + "args": { + "External id": 453897,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450008767.795, "dur": 38.970, + "args": { + "External id": 453898,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450008817.316, "dur": 29.376, + "args": { + "External id": 453899,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450008873.475, "dur": 28.001, + "args": { + "External id": 453900,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267450008921.056, "dur": 16.047, + "args": { + "External id": 453901,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450008959.272, "dur": 174.209, + "args": { + "External id": 453902,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450009010.732, "dur": 11.864, + "args": { + "External id": 453903,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450009016.220, "dur": 5.719, + "args": { + "External id": 453904,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450009025.179, "dur": 5.029, + "args": { + "External id": 453905,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450009031.631, "dur": 1.158, + "args": { + "External id": 453906,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450009035.223, "dur": 5.592, + "args": { + "External id": 453907,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450009146.603, "dur": 58.775, + "args": { + "External id": 453908,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267450009239.957, "dur": 32.780, + "args": { + "External id": 453909,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450009284.784, "dur": 43.668, + "args": { + "External id": 453910,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450009336.099, "dur": 35.206, + "args": { + "External id": 453911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450009395.668, "dur": 28.091, + "args": { + "External id": 453912,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450009431.689, "dur": 34.924, + "args": { + "External id": 453913,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450009484.365, "dur": 17.720, + "args": { + "External id": 453914,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13150 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 2338707, "tid": 2338707, + "ts": 6267450009678.093, "dur": 84.788, + "args": { + "External id": 453915,"Record function id": 0, "Ev Idx": 13151 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267450009840.967, "dur": 47.930, + "args": { + "External id": 453916,"Record function id": 0, "Ev Idx": 13152 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 2338707, "tid": 2338707, + "ts": 6267450009898.159, "dur": 30526.698, + "args": { + "External id": 453917,"Record function id": 0, "Ev Idx": 13153 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338707, "tid": 2338707, + "ts": 6267450009906.944, "dur": 981.887, + "args": { + "External id": 453918,"Record function id": 0, "Ev Idx": 13154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450010001.193, "dur": 9.036, + "args": { + "External id": 453919,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450010023.631, "dur": 89.088, + "args": { + "External id": 453920,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010034.470, "dur": 2.250, + "args": { + "External id": 453921,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010040.861, "dur": 0.679, + "args": { + "External id": 453922,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010042.732, "dur": 0.352, + "args": { + "External id": 453923,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010047.627, "dur": 1.944, + "args": { + "External id": 453924,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010050.800, "dur": 0.451, + "args": { + "External id": 453925,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010091.930, "dur": 0.738, + "args": { + "External id": 453926,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010097.713, "dur": 3.560, + "args": { + "External id": 453927,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010102.371, "dur": 0.468, + "args": { + "External id": 453928,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010104.150, "dur": 2.225, + "args": { + "External id": 453929,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450010125.458, "dur": 58.263, + "args": { + "External id": 453930,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267450010221.701, "dur": 121.602, + "args": { + "External id": 453931,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450010235.753, "dur": 5.398, + "args": { + "External id": 453932,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267450010249.471, "dur": 11.028, + "args": { + "External id": 453933,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450010254.578, "dur": 5.503, + "args": { + "External id": 453934,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010258.091, "dur": 0.657, + "args": { + "External id": 453935,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450010267.175, "dur": 25.360, + "args": { + "External id": 453936,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010269.157, "dur": 0.484, + "args": { + "External id": 453937,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010270.723, "dur": 0.355, + "args": { + "External id": 453938,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010272.076, "dur": 2.359, + "args": { + "External id": 453939,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010275.241, "dur": 2.567, + "args": { + "External id": 453940,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010278.741, "dur": 0.475, + "args": { + "External id": 453941,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010282.288, "dur": 0.405, + "args": { + "External id": 453942,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010283.465, "dur": 0.434, + "args": { + "External id": 453943,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010284.695, "dur": 0.453, + "args": { + "External id": 453944,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450010288.267, "dur": 0.404, + "args": { + "External id": 453945,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450010302.692, "dur": 32.590, + "args": { + "External id": 453946,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267450010400.415, "dur": 385.025, + "args": { + "External id": 453947,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450010433.089, "dur": 346.290, + "args": { + "External id": 453948,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13184, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267450010444.541, "dur": 328.294, + "args": { + "External id": 453949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450010812.575, "dur": 2.818, + "args": { + "External id": 453950,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13186, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338707, "tid": 2338707, + "ts": 6267450010910.042, "dur": 29314.514, + "args": { + "External id": 453951,"Record function id": 0, "Ev Idx": 13187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450011015.869, "dur": 6.900, + "args": { + "External id": 453952,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450011026.798, "dur": 1.024, + "args": { + "External id": 453953,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450011029.543, "dur": 3.288, + "args": { + "External id": 453954,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450011034.469, "dur": 0.875, + "args": { + "External id": 453955,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450011036.974, "dur": 0.809, + "args": { + "External id": 453956,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450011040.144, "dur": 0.910, + "args": { + "External id": 453957,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450011044.711, "dur": 0.733, + "args": { + "External id": 453958,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450011047.100, "dur": 2.007, + "args": { + "External id": 453959,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450011050.566, "dur": 0.793, + "args": { + "External id": 453960,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450011089.042, "dur": 1.648, + "args": { + "External id": 453961,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450011114.186, "dur": 29066.431, + "args": { + "External id": 453962,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450011130.587, "dur": 29041.845, + "args": { + "External id": 453963,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450011149.466, "dur": 16.527, + "args": { + "External id": 453964,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450011169.641, "dur": 28968.241, + "args": { + "External id": 453965,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450011172.613, "dur": 28964.746, + "args": { + "External id": 453966,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450011180.580, "dur": 5.796, + "args": { + "External id": 453967,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450011188.027, "dur": 28945.885, + "args": { + "External id": 453968,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450040368.142, "dur": 31.155, + "args": { + "External id": 453969,"Sequence number": 5036395, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13205 + } + }, + { + "ph": "s", "id": 195, "pid": 2338707, "tid": 2338707, "ts": 6267450040368.142, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450040385.961, "dur": 8.552, + "args": { + "External id": 453970,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450040389.552, "dur": 4.754, + "args": { + "External id": 453971,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267450040468.563, "dur": 79.779, + "args": { + "External id": 453972,"Record function id": 0, "Ev Idx": 13208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267450040549.593, "dur": 1185.881, + "args": { + "External id": 453973,"Record function id": 0, "Ev Idx": 13209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450040590.239, "dur": 1131.134, + "args": { + "External id": 453974,"Sequence number": 5036396, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13210 + } + }, + { + "ph": "s", "id": 194, "pid": 2338707, "tid": 2338707, "ts": 6267450040590.239, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450040676.995, "dur": 53.730, + "args": { + "External id": 453975,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450040744.635, "dur": 107.697, + "args": { + "External id": 453976,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450040865.598, "dur": 40.116, + "args": { + "External id": 453977,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450040914.741, "dur": 29.539, + "args": { + "External id": 453978,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450040969.535, "dur": 28.391, + "args": { + "External id": 453979,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267450041017.011, "dur": 18.756, + "args": { + "External id": 453980,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450041109.013, "dur": 148.722, + "args": { + "External id": 453981,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450041167.923, "dur": 12.632, + "args": { + "External id": 453982,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450041173.255, "dur": 6.121, + "args": { + "External id": 453983,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450041183.067, "dur": 4.214, + "args": { + "External id": 453984,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450041188.542, "dur": 1.269, + "args": { + "External id": 453985,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450041194.472, "dur": 4.934, + "args": { + "External id": 453986,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450041269.870, "dur": 58.212, + "args": { + "External id": 453987,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267450041363.235, "dur": 33.801, + "args": { + "External id": 453988,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450041409.302, "dur": 44.377, + "args": { + "External id": 453989,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450041460.932, "dur": 34.514, + "args": { + "External id": 453990,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450041517.567, "dur": 30.277, + "args": { + "External id": 453991,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450041553.454, "dur": 35.546, + "args": { + "External id": 453992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450041625.553, "dur": 22.281, + "args": { + "External id": 453993,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13229 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 2338707, "tid": 2338707, + "ts": 6267450041803.171, "dur": 79.763, + "args": { + "External id": 453994,"Record function id": 0, "Ev Idx": 13230 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267450041957.053, "dur": 46.659, + "args": { + "External id": 453995,"Record function id": 0, "Ev Idx": 13231 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 2338707, "tid": 2338707, + "ts": 6267450042012.871, "dur": 31802.256, + "args": { + "External id": 453996,"Record function id": 0, "Ev Idx": 13232 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338707, "tid": 2338707, + "ts": 6267450042022.436, "dur": 993.633, + "args": { + "External id": 453997,"Record function id": 0, "Ev Idx": 13233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450042151.027, "dur": 10.265, + "args": { + "External id": 453998,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450042175.849, "dur": 41.056, + "args": { + "External id": 453999,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042181.963, "dur": 2.610, + "args": { + "External id": 454000,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042189.575, "dur": 0.415, + "args": { + "External id": 454001,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042191.711, "dur": 0.710, + "args": { + "External id": 454002,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042194.307, "dur": 0.476, + "args": { + "External id": 454003,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042197.832, "dur": 0.449, + "args": { + "External id": 454004,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042199.891, "dur": 0.403, + "args": { + "External id": 454005,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042201.780, "dur": 4.349, + "args": { + "External id": 454006,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042207.736, "dur": 0.551, + "args": { + "External id": 454007,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042209.909, "dur": 0.582, + "args": { + "External id": 454008,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450042229.338, "dur": 57.905, + "args": { + "External id": 454009,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267450042326.487, "dur": 141.253, + "args": { + "External id": 454010,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450042338.307, "dur": 3.874, + "args": { + "External id": 454011,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267450042347.211, "dur": 11.347, + "args": { + "External id": 454012,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450042352.034, "dur": 6.115, + "args": { + "External id": 454013,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042356.349, "dur": 0.614, + "args": { + "External id": 454014,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450042365.734, "dur": 34.761, + "args": { + "External id": 454015,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042368.261, "dur": 2.382, + "args": { + "External id": 454016,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042372.404, "dur": 0.640, + "args": { + "External id": 454017,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042374.609, "dur": 0.613, + "args": { + "External id": 454018,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042379.203, "dur": 2.290, + "args": { + "External id": 454019,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042383.103, "dur": 0.430, + "args": { + "External id": 454020,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042385.287, "dur": 0.537, + "args": { + "External id": 454021,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042389.239, "dur": 0.688, + "args": { + "External id": 454022,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042391.433, "dur": 0.480, + "args": { + "External id": 454023,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450042393.785, "dur": 2.154, + "args": { + "External id": 454024,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450042421.313, "dur": 38.112, + "args": { + "External id": 454025,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267450042526.073, "dur": 386.072, + "args": { + "External id": 454026,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450042558.826, "dur": 347.707, + "args": { + "External id": 454027,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13263, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267450042571.851, "dur": 328.578, + "args": { + "External id": 454028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450042939.575, "dur": 2.615, + "args": { + "External id": 454029,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13265, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338707, "tid": 2338707, + "ts": 6267450043038.416, "dur": 30565.572, + "args": { + "External id": 454030,"Record function id": 0, "Ev Idx": 13266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450043182.043, "dur": 7.397, + "args": { + "External id": 454031,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450043193.063, "dur": 1.487, + "args": { + "External id": 454032,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450043196.096, "dur": 3.253, + "args": { + "External id": 454033,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450043200.961, "dur": 0.814, + "args": { + "External id": 454034,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450043203.365, "dur": 0.914, + "args": { + "External id": 454035,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450043207.530, "dur": 1.062, + "args": { + "External id": 454036,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450043210.254, "dur": 0.696, + "args": { + "External id": 454037,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450043212.788, "dur": 2.218, + "args": { + "External id": 454038,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450043216.554, "dur": 1.046, + "args": { + "External id": 454039,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450043220.799, "dur": 0.903, + "args": { + "External id": 454040,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450043241.363, "dur": 30305.823, + "args": { + "External id": 454041,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450043258.985, "dur": 30279.952, + "args": { + "External id": 454042,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450043280.257, "dur": 16.491, + "args": { + "External id": 454043,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450043300.634, "dur": 30201.016, + "args": { + "External id": 454044,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450043303.776, "dur": 30197.199, + "args": { + "External id": 454045,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450043309.821, "dur": 5.776, + "args": { + "External id": 454046,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450043317.168, "dur": 30180.701, + "args": { + "External id": 454047,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450073752.469, "dur": 33.383, + "args": { + "External id": 454048,"Sequence number": 5036397, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13284 + } + }, + { + "ph": "s", "id": 193, "pid": 2338707, "tid": 2338707, "ts": 6267450073752.469, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450073772.021, "dur": 9.017, + "args": { + "External id": 454049,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450073775.952, "dur": 4.830, + "args": { + "External id": 454050,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267450073856.755, "dur": 79.348, + "args": { + "External id": 454051,"Record function id": 0, "Ev Idx": 13287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267450073937.631, "dur": 1159.241, + "args": { + "External id": 454052,"Record function id": 0, "Ev Idx": 13288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450073978.324, "dur": 1070.023, + "args": { + "External id": 454053,"Sequence number": 5036398, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13289 + } + }, + { + "ph": "s", "id": 192, "pid": 2338707, "tid": 2338707, "ts": 6267450073978.324, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450074045.270, "dur": 84.716, + "args": { + "External id": 454054,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450074147.002, "dur": 108.595, + "args": { + "External id": 454055,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450074267.708, "dur": 37.313, + "args": { + "External id": 454056,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450074315.935, "dur": 29.911, + "args": { + "External id": 454057,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450074373.956, "dur": 27.307, + "args": { + "External id": 454058,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267450074421.305, "dur": 16.572, + "args": { + "External id": 454059,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450074460.815, "dur": 148.715, + "args": { + "External id": 454060,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450074512.684, "dur": 12.818, + "args": { + "External id": 454061,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450074518.433, "dur": 6.217, + "args": { + "External id": 454062,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450074528.387, "dur": 4.645, + "args": { + "External id": 454063,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450074534.245, "dur": 1.476, + "args": { + "External id": 454064,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450074538.454, "dur": 4.914, + "args": { + "External id": 454065,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450074623.292, "dur": 54.789, + "args": { + "External id": 454066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267450074711.993, "dur": 31.559, + "args": { + "External id": 454067,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450074754.769, "dur": 42.295, + "args": { + "External id": 454068,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450074805.761, "dur": 34.817, + "args": { + "External id": 454069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450074865.502, "dur": 27.919, + "args": { + "External id": 454070,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450074901.458, "dur": 35.691, + "args": { + "External id": 454071,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450074956.095, "dur": 19.524, + "args": { + "External id": 454072,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13308 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 2338707, "tid": 2338707, + "ts": 6267450075168.322, "dur": 80.786, + "args": { + "External id": 454073,"Record function id": 0, "Ev Idx": 13309 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267450075326.201, "dur": 48.831, + "args": { + "External id": 454074,"Record function id": 0, "Ev Idx": 13310 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 2338707, "tid": 2338707, + "ts": 6267450075384.824, "dur": 31556.300, + "args": { + "External id": 454075,"Record function id": 0, "Ev Idx": 13311 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338707, "tid": 2338707, + "ts": 6267450075392.618, "dur": 976.760, + "args": { + "External id": 454076,"Record function id": 0, "Ev Idx": 13312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450075481.213, "dur": 9.344, + "args": { + "External id": 454077,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450075504.411, "dur": 40.799, + "args": { + "External id": 454078,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075510.769, "dur": 2.368, + "args": { + "External id": 454079,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075517.857, "dur": 0.558, + "args": { + "External id": 454080,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075520.033, "dur": 0.581, + "args": { + "External id": 454081,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075522.224, "dur": 0.622, + "args": { + "External id": 454082,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075526.393, "dur": 0.425, + "args": { + "External id": 454083,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075528.073, "dur": 0.622, + "args": { + "External id": 454084,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075530.257, "dur": 4.544, + "args": { + "External id": 454085,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075536.412, "dur": 0.467, + "args": { + "External id": 454086,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075538.157, "dur": 0.564, + "args": { + "External id": 454087,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450075560.027, "dur": 72.330, + "args": { + "External id": 454088,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267450075670.341, "dur": 134.663, + "args": { + "External id": 454089,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450075682.649, "dur": 7.370, + "args": { + "External id": 454090,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267450075697.773, "dur": 12.222, + "args": { + "External id": 454091,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450075702.977, "dur": 6.597, + "args": { + "External id": 454092,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075707.296, "dur": 0.698, + "args": { + "External id": 454093,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450075717.352, "dur": 31.923, + "args": { + "External id": 454094,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075719.998, "dur": 0.316, + "args": { + "External id": 454095,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075721.885, "dur": 2.808, + "args": { + "External id": 454096,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075726.064, "dur": 0.480, + "args": { + "External id": 454097,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075728.155, "dur": 2.152, + "args": { + "External id": 454098,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075734.480, "dur": 0.312, + "args": { + "External id": 454099,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075736.166, "dur": 0.401, + "args": { + "External id": 454100,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075737.814, "dur": 0.343, + "args": { + "External id": 454101,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075741.289, "dur": 0.413, + "args": { + "External id": 454102,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450075743.861, "dur": 0.507, + "args": { + "External id": 454103,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450075761.514, "dur": 35.652, + "args": { + "External id": 454104,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267450075860.813, "dur": 401.051, + "args": { + "External id": 454105,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450075893.947, "dur": 361.990, + "args": { + "External id": 454106,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13342, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267450075905.321, "dur": 344.413, + "args": { + "External id": 454107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450076288.838, "dur": 2.958, + "args": { + "External id": 454108,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13344, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338707, "tid": 2338707, + "ts": 6267450076391.636, "dur": 30346.393, + "args": { + "External id": 454109,"Record function id": 0, "Ev Idx": 13345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450076499.510, "dur": 6.959, + "args": { + "External id": 454110,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450076510.301, "dur": 1.019, + "args": { + "External id": 454111,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450076512.972, "dur": 3.138, + "args": { + "External id": 454112,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450076517.909, "dur": 0.766, + "args": { + "External id": 454113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450076520.219, "dur": 1.018, + "args": { + "External id": 454114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450076522.496, "dur": 0.884, + "args": { + "External id": 454115,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450076526.852, "dur": 1.404, + "args": { + "External id": 454116,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450076529.802, "dur": 2.108, + "args": { + "External id": 454117,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450076533.495, "dur": 0.968, + "args": { + "External id": 454118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450076535.888, "dur": 0.649, + "args": { + "External id": 454119,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450076557.596, "dur": 30137.206, + "args": { + "External id": 454120,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450076573.569, "dur": 30112.882, + "args": { + "External id": 454121,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450076613.691, "dur": 17.465, + "args": { + "External id": 454122,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450076635.218, "dur": 30014.188, + "args": { + "External id": 454123,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450076637.751, "dur": 30011.171, + "args": { + "External id": 454124,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450076644.500, "dur": 6.566, + "args": { + "External id": 454125,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450076653.154, "dur": 29992.608, + "args": { + "External id": 454126,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450106883.102, "dur": 32.327, + "args": { + "External id": 454127,"Sequence number": 5036399, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13363 + } + }, + { + "ph": "s", "id": 191, "pid": 2338707, "tid": 2338707, "ts": 6267450106883.102, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450106901.907, "dur": 8.865, + "args": { + "External id": 454128,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450106905.861, "dur": 4.691, + "args": { + "External id": 454129,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267450106983.387, "dur": 115.699, + "args": { + "External id": 454130,"Record function id": 0, "Ev Idx": 13366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267450107102.094, "dur": 1189.918, + "args": { + "External id": 454131,"Record function id": 0, "Ev Idx": 13367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450107147.017, "dur": 1130.610, + "args": { + "External id": 454132,"Sequence number": 5036400, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13368 + } + }, + { + "ph": "s", "id": 190, "pid": 2338707, "tid": 2338707, "ts": 6267450107147.017, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450107213.734, "dur": 52.091, + "args": { + "External id": 454133,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450107279.184, "dur": 108.953, + "args": { + "External id": 454134,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450107399.841, "dur": 39.399, + "args": { + "External id": 454135,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450107450.085, "dur": 29.553, + "args": { + "External id": 454136,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450107510.838, "dur": 26.586, + "args": { + "External id": 454137,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267450107555.822, "dur": 17.229, + "args": { + "External id": 454138,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450107610.736, "dur": 142.333, + "args": { + "External id": 454139,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450107665.871, "dur": 13.488, + "args": { + "External id": 454140,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450107672.276, "dur": 6.002, + "args": { + "External id": 454141,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450107682.292, "dur": 4.342, + "args": { + "External id": 454142,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450107688.318, "dur": 2.253, + "args": { + "External id": 454143,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450107693.265, "dur": 6.988, + "args": { + "External id": 454144,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450107789.650, "dur": 58.683, + "args": { + "External id": 454145,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267450107883.493, "dur": 32.594, + "args": { + "External id": 454146,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450107926.946, "dur": 44.174, + "args": { + "External id": 454147,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450107979.669, "dur": 35.092, + "args": { + "External id": 454148,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450108039.631, "dur": 68.161, + "args": { + "External id": 454149,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450108117.412, "dur": 40.454, + "args": { + "External id": 454150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450108182.422, "dur": 20.144, + "args": { + "External id": 454151,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 2338707, "tid": 2338707, + "ts": 6267450108361.221, "dur": 86.984, + "args": { + "External id": 454152,"Record function id": 0, "Ev Idx": 13388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267450108525.707, "dur": 46.490, + "args": { + "External id": 454153,"Record function id": 0, "Ev Idx": 13389 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 2338707, "tid": 2338707, + "ts": 6267450108581.661, "dur": 30803.934, + "args": { + "External id": 454154,"Record function id": 0, "Ev Idx": 13390 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338707, "tid": 2338707, + "ts": 6267450108589.622, "dur": 956.049, + "args": { + "External id": 454155,"Record function id": 0, "Ev Idx": 13391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450108695.149, "dur": 10.533, + "args": { + "External id": 454156,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450108720.493, "dur": 40.496, + "args": { + "External id": 454157,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108726.678, "dur": 2.450, + "args": { + "External id": 454158,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108733.769, "dur": 0.421, + "args": { + "External id": 454159,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108735.923, "dur": 0.533, + "args": { + "External id": 454160,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108738.308, "dur": 0.367, + "args": { + "External id": 454161,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108742.386, "dur": 0.512, + "args": { + "External id": 454162,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108744.261, "dur": 0.464, + "args": { + "External id": 454163,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108746.407, "dur": 3.935, + "args": { + "External id": 454164,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108751.908, "dur": 0.438, + "args": { + "External id": 454165,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108754.142, "dur": 0.346, + "args": { + "External id": 454166,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450108773.114, "dur": 57.227, + "args": { + "External id": 454167,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267450108867.524, "dur": 129.351, + "args": { + "External id": 454168,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450108879.088, "dur": 4.462, + "args": { + "External id": 454169,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267450108889.239, "dur": 10.814, + "args": { + "External id": 454170,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450108893.916, "dur": 5.715, + "args": { + "External id": 454171,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108897.866, "dur": 0.492, + "args": { + "External id": 454172,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450108906.911, "dur": 31.437, + "args": { + "External id": 454173,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108909.395, "dur": 2.519, + "args": { + "External id": 454174,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108913.299, "dur": 0.385, + "args": { + "External id": 454175,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108915.338, "dur": 0.390, + "args": { + "External id": 454176,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108919.724, "dur": 2.464, + "args": { + "External id": 454177,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108923.734, "dur": 0.514, + "args": { + "External id": 454178,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108925.745, "dur": 0.249, + "args": { + "External id": 454179,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108927.765, "dur": 0.287, + "args": { + "External id": 454180,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108929.834, "dur": 0.392, + "args": { + "External id": 454181,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450108931.842, "dur": 2.456, + "args": { + "External id": 454182,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450108952.744, "dur": 36.007, + "args": { + "External id": 454183,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267450109086.548, "dur": 358.377, + "args": { + "External id": 454184,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450109120.517, "dur": 319.395, + "args": { + "External id": 454185,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13421, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267450109132.717, "dur": 301.913, + "args": { + "External id": 454186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450109470.144, "dur": 2.569, + "args": { + "External id": 454187,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13423, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338707, "tid": 2338707, + "ts": 6267450109567.831, "dur": 29611.157, + "args": { + "External id": 454188,"Record function id": 0, "Ev Idx": 13424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450109693.472, "dur": 7.629, + "args": { + "External id": 454189,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450109704.818, "dur": 0.890, + "args": { + "External id": 454190,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450109707.284, "dur": 3.494, + "args": { + "External id": 454191,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450109712.551, "dur": 1.068, + "args": { + "External id": 454192,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450109714.919, "dur": 1.056, + "args": { + "External id": 454193,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450109717.216, "dur": 0.981, + "args": { + "External id": 454194,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450109721.932, "dur": 0.973, + "args": { + "External id": 454195,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450109725.088, "dur": 2.244, + "args": { + "External id": 454196,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450109728.689, "dur": 0.683, + "args": { + "External id": 454197,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450109730.937, "dur": 0.672, + "args": { + "External id": 454198,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450109753.296, "dur": 29381.319, + "args": { + "External id": 454199,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450109770.077, "dur": 29356.217, + "args": { + "External id": 454200,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450109799.740, "dur": 17.351, + "args": { + "External id": 454201,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450109820.704, "dur": 29268.057, + "args": { + "External id": 454202,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450109823.290, "dur": 29264.903, + "args": { + "External id": 454203,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450109830.057, "dur": 6.229, + "args": { + "External id": 454204,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450109838.164, "dur": 29246.903, + "args": { + "External id": 454205,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450139326.589, "dur": 34.967, + "args": { + "External id": 454206,"Sequence number": 5036401, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13442 + } + }, + { + "ph": "s", "id": 189, "pid": 2338707, "tid": 2338707, "ts": 6267450139326.589, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450139344.193, "dur": 9.189, + "args": { + "External id": 454207,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450139348.266, "dur": 4.915, + "args": { + "External id": 454208,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267450139427.165, "dur": 76.369, + "args": { + "External id": 454209,"Record function id": 0, "Ev Idx": 13445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267450139504.865, "dur": 1200.303, + "args": { + "External id": 454210,"Record function id": 0, "Ev Idx": 13446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450139548.116, "dur": 1141.823, + "args": { + "External id": 454211,"Sequence number": 5036402, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13447 + } + }, + { + "ph": "s", "id": 188, "pid": 2338707, "tid": 2338707, "ts": 6267450139548.116, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450139629.976, "dur": 53.556, + "args": { + "External id": 454212,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450139699.115, "dur": 107.127, + "args": { + "External id": 454213,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450139820.143, "dur": 38.576, + "args": { + "External id": 454214,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450139869.977, "dur": 29.978, + "args": { + "External id": 454215,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450139927.075, "dur": 28.891, + "args": { + "External id": 454216,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267450139976.384, "dur": 19.984, + "args": { + "External id": 454217,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450140019.621, "dur": 192.473, + "args": { + "External id": 454218,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450140123.400, "dur": 14.220, + "args": { + "External id": 454219,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450140129.610, "dur": 6.959, + "args": { + "External id": 454220,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450140140.652, "dur": 4.006, + "args": { + "External id": 454221,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450140146.234, "dur": 1.438, + "args": { + "External id": 454222,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450140150.464, "dur": 5.664, + "args": { + "External id": 454223,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450140225.744, "dur": 61.715, + "args": { + "External id": 454224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267450140322.290, "dur": 34.375, + "args": { + "External id": 454225,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450140368.934, "dur": 43.978, + "args": { + "External id": 454226,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450140422.212, "dur": 35.238, + "args": { + "External id": 454227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450140482.233, "dur": 29.972, + "args": { + "External id": 454228,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450140520.221, "dur": 35.611, + "args": { + "External id": 454229,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450140573.715, "dur": 34.744, + "args": { + "External id": 454230,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13466 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 2338707, "tid": 2338707, + "ts": 6267450140773.658, "dur": 89.099, + "args": { + "External id": 454231,"Record function id": 0, "Ev Idx": 13467 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267450140940.525, "dur": 48.699, + "args": { + "External id": 454232,"Record function id": 0, "Ev Idx": 13468 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 2338707, "tid": 2338707, + "ts": 6267450140999.228, "dur": 32845.777, + "args": { + "External id": 454233,"Record function id": 0, "Ev Idx": 13469 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338707, "tid": 2338707, + "ts": 6267450141009.144, "dur": 1039.475, + "args": { + "External id": 454234,"Record function id": 0, "Ev Idx": 13470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450141151.813, "dur": 10.368, + "args": { + "External id": 454235,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450141178.145, "dur": 41.790, + "args": { + "External id": 454236,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141184.933, "dur": 2.194, + "args": { + "External id": 454237,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141192.141, "dur": 0.497, + "args": { + "External id": 454238,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141194.010, "dur": 0.327, + "args": { + "External id": 454239,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141196.194, "dur": 0.711, + "args": { + "External id": 454240,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141200.349, "dur": 0.315, + "args": { + "External id": 454241,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141202.367, "dur": 0.590, + "args": { + "External id": 454242,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141205.021, "dur": 4.562, + "args": { + "External id": 454243,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141210.847, "dur": 0.267, + "args": { + "External id": 454244,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141212.983, "dur": 0.434, + "args": { + "External id": 454245,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450141232.500, "dur": 60.193, + "args": { + "External id": 454246,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267450141328.103, "dur": 137.253, + "args": { + "External id": 454247,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450141344.464, "dur": 6.392, + "args": { + "External id": 454248,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267450141358.680, "dur": 11.266, + "args": { + "External id": 454249,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450141363.414, "dur": 5.967, + "args": { + "External id": 454250,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141367.564, "dur": 0.570, + "args": { + "External id": 454251,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450141376.944, "dur": 32.026, + "args": { + "External id": 454252,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141379.825, "dur": 0.509, + "args": { + "External id": 454253,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141382.063, "dur": 2.695, + "args": { + "External id": 454254,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141386.077, "dur": 0.487, + "args": { + "External id": 454255,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141387.961, "dur": 2.577, + "args": { + "External id": 454256,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141394.453, "dur": 0.266, + "args": { + "External id": 454257,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141396.414, "dur": 0.293, + "args": { + "External id": 454258,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141398.233, "dur": 0.411, + "args": { + "External id": 454259,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141401.781, "dur": 0.339, + "args": { + "External id": 454260,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450141403.650, "dur": 0.439, + "args": { + "External id": 454261,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450141420.841, "dur": 36.006, + "args": { + "External id": 454262,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267450141521.341, "dur": 420.501, + "args": { + "External id": 454263,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450141554.735, "dur": 381.167, + "args": { + "External id": 454264,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13500, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267450141565.498, "dur": 363.318, + "args": { + "External id": 454265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450141970.494, "dur": 2.514, + "args": { + "External id": 454266,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13502, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338707, "tid": 2338707, + "ts": 6267450142109.924, "dur": 31526.526, + "args": { + "External id": 454267,"Record function id": 0, "Ev Idx": 13503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450142225.373, "dur": 7.237, + "args": { + "External id": 454268,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450142236.337, "dur": 1.320, + "args": { + "External id": 454269,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450142239.346, "dur": 3.261, + "args": { + "External id": 454270,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450142244.270, "dur": 1.194, + "args": { + "External id": 454271,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450142247.093, "dur": 1.199, + "args": { + "External id": 454272,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450142249.666, "dur": 0.979, + "args": { + "External id": 454273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450142254.089, "dur": 0.995, + "args": { + "External id": 454274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450142256.706, "dur": 2.617, + "args": { + "External id": 454275,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450142260.696, "dur": 0.748, + "args": { + "External id": 454276,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450142263.383, "dur": 0.995, + "args": { + "External id": 454277,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450142285.973, "dur": 31292.655, + "args": { + "External id": 454278,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450142302.473, "dur": 31268.011, + "args": { + "External id": 454279,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450142324.925, "dur": 18.759, + "args": { + "External id": 454280,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450142347.325, "dur": 31185.908, + "args": { + "External id": 454281,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450142349.994, "dur": 31182.583, + "args": { + "External id": 454282,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450142356.112, "dur": 5.870, + "args": { + "External id": 454283,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450142363.778, "dur": 31165.540, + "args": { + "External id": 454284,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450173787.142, "dur": 32.926, + "args": { + "External id": 454285,"Sequence number": 5036403, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13521 + } + }, + { + "ph": "s", "id": 187, "pid": 2338707, "tid": 2338707, "ts": 6267450173787.142, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450173805.123, "dur": 9.330, + "args": { + "External id": 454286,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450173809.222, "dur": 5.013, + "args": { + "External id": 454287,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267450173885.811, "dur": 76.216, + "args": { + "External id": 454288,"Record function id": 0, "Ev Idx": 13524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267450173963.518, "dur": 1182.516, + "args": { + "External id": 454289,"Record function id": 0, "Ev Idx": 13525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450174005.413, "dur": 1124.602, + "args": { + "External id": 454290,"Sequence number": 5036404, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13526 + } + }, + { + "ph": "s", "id": 186, "pid": 2338707, "tid": 2338707, "ts": 6267450174005.413, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450174112.494, "dur": 54.154, + "args": { + "External id": 454291,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450174184.158, "dur": 106.990, + "args": { + "External id": 454292,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450174303.918, "dur": 37.761, + "args": { + "External id": 454293,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450174350.129, "dur": 33.713, + "args": { + "External id": 454294,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450174414.412, "dur": 31.735, + "args": { + "External id": 454295,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267450174463.615, "dur": 20.759, + "args": { + "External id": 454296,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450174504.835, "dur": 158.468, + "args": { + "External id": 454297,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450174558.140, "dur": 13.529, + "args": { + "External id": 454298,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450174564.161, "dur": 6.548, + "args": { + "External id": 454299,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450174574.607, "dur": 4.691, + "args": { + "External id": 454300,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450174580.649, "dur": 3.491, + "args": { + "External id": 454301,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450174586.599, "dur": 20.220, + "args": { + "External id": 454302,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450174675.001, "dur": 53.717, + "args": { + "External id": 454303,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267450174763.363, "dur": 31.339, + "args": { + "External id": 454304,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450174806.498, "dur": 42.043, + "args": { + "External id": 454305,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450174855.485, "dur": 34.913, + "args": { + "External id": 454306,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450174916.796, "dur": 27.201, + "args": { + "External id": 454307,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450174949.430, "dur": 35.556, + "args": { + "External id": 454308,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450175002.652, "dur": 20.484, + "args": { + "External id": 454309,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13545 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 2338707, "tid": 2338707, + "ts": 6267450175214.637, "dur": 83.998, + "args": { + "External id": 454310,"Record function id": 0, "Ev Idx": 13546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267450175372.024, "dur": 45.368, + "args": { + "External id": 454311,"Record function id": 0, "Ev Idx": 13547 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 2338707, "tid": 2338707, + "ts": 6267450175427.092, "dur": 32891.377, + "args": { + "External id": 454312,"Record function id": 0, "Ev Idx": 13548 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338707, "tid": 2338707, + "ts": 6267450175435.007, "dur": 971.410, + "args": { + "External id": 454313,"Record function id": 0, "Ev Idx": 13549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450175522.561, "dur": 9.863, + "args": { + "External id": 454314,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450175545.578, "dur": 38.456, + "args": { + "External id": 454315,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175551.892, "dur": 2.446, + "args": { + "External id": 454316,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175558.465, "dur": 0.254, + "args": { + "External id": 454317,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175560.325, "dur": 0.592, + "args": { + "External id": 454318,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175562.393, "dur": 0.627, + "args": { + "External id": 454319,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175566.609, "dur": 0.492, + "args": { + "External id": 454320,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175568.485, "dur": 0.408, + "args": { + "External id": 454321,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175570.221, "dur": 3.056, + "args": { + "External id": 454322,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175574.852, "dur": 0.289, + "args": { + "External id": 454323,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175576.321, "dur": 0.308, + "args": { + "External id": 454324,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450175611.495, "dur": 58.497, + "args": { + "External id": 454325,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267450175708.334, "dur": 136.832, + "args": { + "External id": 454326,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450175720.823, "dur": 5.120, + "args": { + "External id": 454327,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267450175731.479, "dur": 11.987, + "args": { + "External id": 454328,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450175736.481, "dur": 6.600, + "args": { + "External id": 454329,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175740.477, "dur": 0.746, + "args": { + "External id": 454330,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450175751.215, "dur": 34.399, + "args": { + "External id": 454331,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175753.691, "dur": 2.302, + "args": { + "External id": 454332,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175757.862, "dur": 0.465, + "args": { + "External id": 454333,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175759.790, "dur": 0.423, + "args": { + "External id": 454334,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175763.881, "dur": 2.560, + "args": { + "External id": 454335,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175768.090, "dur": 0.284, + "args": { + "External id": 454336,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175769.764, "dur": 0.279, + "args": { + "External id": 454337,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175773.962, "dur": 0.437, + "args": { + "External id": 454338,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175775.987, "dur": 0.550, + "args": { + "External id": 454339,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450175778.008, "dur": 2.335, + "args": { + "External id": 454340,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450175798.994, "dur": 37.849, + "args": { + "External id": 454341,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267450175900.383, "dur": 396.260, + "args": { + "External id": 454342,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450175933.203, "dur": 357.796, + "args": { + "External id": 454343,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13579, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267450175944.551, "dur": 339.716, + "args": { + "External id": 454344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450176323.402, "dur": 2.236, + "args": { + "External id": 454345,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13581, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338707, "tid": 2338707, + "ts": 6267450176429.733, "dur": 31669.805, + "args": { + "External id": 454346,"Record function id": 0, "Ev Idx": 13582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450176538.392, "dur": 6.466, + "args": { + "External id": 454347,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450176562.153, "dur": 1.211, + "args": { + "External id": 454348,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450176565.070, "dur": 3.611, + "args": { + "External id": 454349,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450176572.277, "dur": 1.123, + "args": { + "External id": 454350,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450176574.861, "dur": 1.205, + "args": { + "External id": 454351,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450176577.455, "dur": 1.200, + "args": { + "External id": 454352,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450176580.555, "dur": 1.300, + "args": { + "External id": 454353,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450176585.550, "dur": 2.704, + "args": { + "External id": 454354,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450176590.002, "dur": 0.940, + "args": { + "External id": 454355,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450176609.655, "dur": 1.262, + "args": { + "External id": 454356,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450176634.183, "dur": 31397.859, + "args": { + "External id": 454357,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450176651.606, "dur": 31371.849, + "args": { + "External id": 454358,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450176668.089, "dur": 18.579, + "args": { + "External id": 454359,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450176692.551, "dur": 31292.528, + "args": { + "External id": 454360,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450176695.345, "dur": 31289.166, + "args": { + "External id": 454361,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450176701.444, "dur": 6.081, + "args": { + "External id": 454362,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450176709.222, "dur": 31272.238, + "args": { + "External id": 454363,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450208253.522, "dur": 35.630, + "args": { + "External id": 454364,"Sequence number": 5036405, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13600 + } + }, + { + "ph": "s", "id": 185, "pid": 2338707, "tid": 2338707, "ts": 6267450208253.522, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450208273.971, "dur": 9.713, + "args": { + "External id": 454365,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450208278.406, "dur": 4.963, + "args": { + "External id": 454366,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267450208359.558, "dur": 73.697, + "args": { + "External id": 454367,"Record function id": 0, "Ev Idx": 13603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267450208434.599, "dur": 1172.634, + "args": { + "External id": 454368,"Record function id": 0, "Ev Idx": 13604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450208473.213, "dur": 1105.017, + "args": { + "External id": 454369,"Sequence number": 5036406, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13605 + } + }, + { + "ph": "s", "id": 184, "pid": 2338707, "tid": 2338707, "ts": 6267450208473.213, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450208545.235, "dur": 65.994, + "args": { + "External id": 454370,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450208628.331, "dur": 110.182, + "args": { + "External id": 454371,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450208751.406, "dur": 37.926, + "args": { + "External id": 454372,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450208800.662, "dur": 29.549, + "args": { + "External id": 454373,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450208856.971, "dur": 31.215, + "args": { + "External id": 454374,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267450208911.099, "dur": 18.293, + "args": { + "External id": 454375,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450208953.798, "dur": 172.660, + "args": { + "External id": 454376,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450209004.894, "dur": 13.173, + "args": { + "External id": 454377,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450209011.265, "dur": 5.862, + "args": { + "External id": 454378,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450209021.187, "dur": 4.565, + "args": { + "External id": 454379,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450209027.292, "dur": 1.292, + "args": { + "External id": 454380,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450209031.534, "dur": 4.821, + "args": { + "External id": 454381,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450209139.608, "dur": 57.907, + "args": { + "External id": 454382,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267450209232.508, "dur": 29.860, + "args": { + "External id": 454383,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450209273.287, "dur": 43.420, + "args": { + "External id": 454384,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450209328.275, "dur": 34.421, + "args": { + "External id": 454385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450209386.109, "dur": 28.866, + "args": { + "External id": 454386,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450209423.249, "dur": 36.549, + "args": { + "External id": 454387,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450209480.789, "dur": 19.799, + "args": { + "External id": 454388,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13624 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 2338707, "tid": 2338707, + "ts": 6267450209677.829, "dur": 86.264, + "args": { + "External id": 454389,"Record function id": 0, "Ev Idx": 13625 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267450209840.170, "dur": 47.900, + "args": { + "External id": 454390,"Record function id": 0, "Ev Idx": 13626 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.20)", "pid": 2338707, "tid": 2338707, + "ts": 6267450209897.946, "dur": 32080.833, + "args": { + "External id": 454391,"Record function id": 0, "Ev Idx": 13627 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338707, "tid": 2338707, + "ts": 6267450209907.472, "dur": 998.627, + "args": { + "External id": 454392,"Record function id": 0, "Ev Idx": 13628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450209995.185, "dur": 9.486, + "args": { + "External id": 454393,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450210019.170, "dur": 79.809, + "args": { + "External id": 454394,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210025.344, "dur": 2.201, + "args": { + "External id": 454395,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210032.879, "dur": 0.385, + "args": { + "External id": 454396,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210034.821, "dur": 0.329, + "args": { + "External id": 454397,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210036.724, "dur": 0.441, + "args": { + "External id": 454398,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210040.767, "dur": 0.281, + "args": { + "External id": 454399,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210042.710, "dur": 0.402, + "args": { + "External id": 454400,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210044.602, "dur": 3.903, + "args": { + "External id": 454401,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210050.223, "dur": 0.486, + "args": { + "External id": 454402,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210052.342, "dur": 0.388, + "args": { + "External id": 454403,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450210114.431, "dur": 59.362, + "args": { + "External id": 454404,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267450210211.490, "dur": 143.067, + "args": { + "External id": 454405,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450210224.811, "dur": 5.889, + "args": { + "External id": 454406,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267450210238.547, "dur": 11.698, + "args": { + "External id": 454407,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450210243.892, "dur": 5.939, + "args": { + "External id": 454408,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210247.836, "dur": 0.583, + "args": { + "External id": 454409,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450210258.072, "dur": 37.435, + "args": { + "External id": 454410,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210260.734, "dur": 2.836, + "args": { + "External id": 454411,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210265.514, "dur": 0.535, + "args": { + "External id": 454412,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210271.044, "dur": 0.560, + "args": { + "External id": 454413,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210274.853, "dur": 2.611, + "args": { + "External id": 454414,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210278.909, "dur": 0.475, + "args": { + "External id": 454415,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210280.889, "dur": 1.107, + "args": { + "External id": 454416,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210283.294, "dur": 0.391, + "args": { + "External id": 454417,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210285.333, "dur": 0.361, + "args": { + "External id": 454418,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450210290.270, "dur": 0.338, + "args": { + "External id": 454419,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450210307.625, "dur": 38.416, + "args": { + "External id": 454420,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267450210412.316, "dur": 388.964, + "args": { + "External id": 454421,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450210446.034, "dur": 349.444, + "args": { + "External id": 454422,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13658, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267450210456.956, "dur": 331.179, + "args": { + "External id": 454423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450210827.876, "dur": 3.034, + "args": { + "External id": 454424,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13660, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338707, "tid": 2338707, + "ts": 6267450210927.996, "dur": 30837.199, + "args": { + "External id": 454425,"Record function id": 0, "Ev Idx": 13661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450211036.271, "dur": 6.745, + "args": { + "External id": 454426,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450211046.411, "dur": 1.062, + "args": { + "External id": 454427,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450211049.291, "dur": 3.499, + "args": { + "External id": 454428,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450211092.640, "dur": 1.480, + "args": { + "External id": 454429,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450211096.074, "dur": 1.019, + "args": { + "External id": 454430,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450211098.540, "dur": 1.230, + "args": { + "External id": 454431,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450211103.504, "dur": 1.046, + "args": { + "External id": 454432,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450211106.313, "dur": 2.279, + "args": { + "External id": 454433,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450211110.380, "dur": 0.960, + "args": { + "External id": 454434,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450211112.672, "dur": 0.586, + "args": { + "External id": 454435,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450211135.758, "dur": 30586.157, + "args": { + "External id": 454436,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450211153.631, "dur": 30560.145, + "args": { + "External id": 454437,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450211177.347, "dur": 17.329, + "args": { + "External id": 454438,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450211198.333, "dur": 30480.267, + "args": { + "External id": 454439,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450211201.267, "dur": 30476.741, + "args": { + "External id": 454440,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450211207.173, "dur": 6.005, + "args": { + "External id": 454441,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450211215.009, "dur": 30459.770, + "args": { + "External id": 454442,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450241918.268, "dur": 36.462, + "args": { + "External id": 454443,"Sequence number": 5036407, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13679 + } + }, + { + "ph": "s", "id": 183, "pid": 2338707, "tid": 2338707, "ts": 6267450241918.268, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450241940.149, "dur": 9.028, + "args": { + "External id": 454444,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450241944.299, "dur": 4.673, + "args": { + "External id": 454445,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267450242020.078, "dur": 106.417, + "args": { + "External id": 454446,"Record function id": 0, "Ev Idx": 13682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267450242129.217, "dur": 1166.599, + "args": { + "External id": 454447,"Record function id": 0, "Ev Idx": 13683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450242174.302, "dur": 1106.915, + "args": { + "External id": 454448,"Sequence number": 5036408, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13684 + } + }, + { + "ph": "s", "id": 182, "pid": 2338707, "tid": 2338707, "ts": 6267450242174.302, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450242247.349, "dur": 53.573, + "args": { + "External id": 454449,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450242314.829, "dur": 111.262, + "args": { + "External id": 454450,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450242441.689, "dur": 37.312, + "args": { + "External id": 454451,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450242487.230, "dur": 29.417, + "args": { + "External id": 454452,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450242544.727, "dur": 27.338, + "args": { + "External id": 454453,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267450242591.123, "dur": 34.607, + "args": { + "External id": 454454,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450242651.637, "dur": 141.080, + "args": { + "External id": 454455,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450242705.002, "dur": 13.538, + "args": { + "External id": 454456,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450242711.257, "dur": 6.333, + "args": { + "External id": 454457,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450242721.379, "dur": 4.224, + "args": { + "External id": 454458,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450242726.969, "dur": 1.273, + "args": { + "External id": 454459,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450242733.336, "dur": 5.732, + "args": { + "External id": 454460,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450242803.192, "dur": 54.459, + "args": { + "External id": 454461,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267450242891.206, "dur": 31.209, + "args": { + "External id": 454462,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450242934.155, "dur": 43.491, + "args": { + "External id": 454463,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450242985.399, "dur": 34.435, + "args": { + "External id": 454464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450243045.206, "dur": 66.419, + "args": { + "External id": 454465,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450243120.029, "dur": 43.127, + "args": { + "External id": 454466,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450243184.383, "dur": 19.131, + "args": { + "External id": 454467,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13703 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.20)", "pid": 2338707, "tid": 2338707, + "ts": 6267450243365.286, "dur": 86.462, + "args": { + "External id": 454468,"Record function id": 0, "Ev Idx": 13704 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267450243531.206, "dur": 48.633, + "args": { + "External id": 454469,"Record function id": 0, "Ev Idx": 13705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.21)", "pid": 2338707, "tid": 2338707, + "ts": 6267450243588.886, "dur": 37989.896, + "args": { + "External id": 454470,"Record function id": 0, "Ev Idx": 13706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338707, "tid": 2338707, + "ts": 6267450243616.073, "dur": 963.578, + "args": { + "External id": 454471,"Record function id": 0, "Ev Idx": 13707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450243709.328, "dur": 10.095, + "args": { + "External id": 454472,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450243733.855, "dur": 41.029, + "args": { + "External id": 454473,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243740.341, "dur": 2.411, + "args": { + "External id": 454474,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243747.765, "dur": 0.390, + "args": { + "External id": 454475,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243749.853, "dur": 0.742, + "args": { + "External id": 454476,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243752.282, "dur": 0.479, + "args": { + "External id": 454477,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243756.405, "dur": 0.503, + "args": { + "External id": 454478,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243758.256, "dur": 0.329, + "args": { + "External id": 454479,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243760.132, "dur": 4.547, + "args": { + "External id": 454480,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243766.305, "dur": 0.285, + "args": { + "External id": 454481,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243768.088, "dur": 0.302, + "args": { + "External id": 454482,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450243787.966, "dur": 54.285, + "args": { + "External id": 454483,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267450243877.749, "dur": 130.018, + "args": { + "External id": 454484,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450243888.647, "dur": 4.160, + "args": { + "External id": 454485,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267450243898.207, "dur": 11.504, + "args": { + "External id": 454486,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450243902.941, "dur": 6.361, + "args": { + "External id": 454487,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243907.236, "dur": 0.719, + "args": { + "External id": 454488,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450243917.386, "dur": 36.248, + "args": { + "External id": 454489,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243920.812, "dur": 2.746, + "args": { + "External id": 454490,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243924.961, "dur": 0.545, + "args": { + "External id": 454491,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243926.782, "dur": 0.452, + "args": { + "External id": 454492,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243931.147, "dur": 2.633, + "args": { + "External id": 454493,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243935.480, "dur": 0.617, + "args": { + "External id": 454494,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243937.651, "dur": 0.536, + "args": { + "External id": 454495,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243942.443, "dur": 0.393, + "args": { + "External id": 454496,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243944.282, "dur": 0.453, + "args": { + "External id": 454497,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450243946.287, "dur": 2.667, + "args": { + "External id": 454498,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450243967.744, "dur": 32.294, + "args": { + "External id": 454499,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267450244110.889, "dur": 366.497, + "args": { + "External id": 454500,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450244147.914, "dur": 324.326, + "args": { + "External id": 454501,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13737, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267450244159.954, "dur": 306.648, + "args": { + "External id": 454502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450244504.102, "dur": 2.247, + "args": { + "External id": 454503,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13739, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338707, "tid": 2338707, + "ts": 6267450244620.068, "dur": 36751.070, + "args": { + "External id": 454504,"Record function id": 0, "Ev Idx": 13740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450244735.893, "dur": 7.252, + "args": { + "External id": 454505,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450244746.759, "dur": 1.059, + "args": { + "External id": 454506,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450244749.297, "dur": 3.256, + "args": { + "External id": 454507,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450244754.236, "dur": 1.042, + "args": { + "External id": 454508,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450244756.766, "dur": 0.805, + "args": { + "External id": 454509,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450244758.876, "dur": 1.074, + "args": { + "External id": 454510,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450244761.776, "dur": 1.034, + "args": { + "External id": 454511,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450244764.277, "dur": 2.281, + "args": { + "External id": 454512,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450244770.209, "dur": 0.914, + "args": { + "External id": 454513,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450244773.057, "dur": 0.652, + "args": { + "External id": 454514,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450244793.156, "dur": 36532.183, + "args": { + "External id": 454515,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450244810.231, "dur": 36506.156, + "args": { + "External id": 454516,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450244830.969, "dur": 16.096, + "args": { + "External id": 454517,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450244850.651, "dur": 36429.434, + "args": { + "External id": 454518,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450244853.470, "dur": 36425.971, + "args": { + "External id": 454519,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450244860.597, "dur": 5.350, + "args": { + "External id": 454520,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450244869.761, "dur": 36406.410, + "args": { + "External id": 454521,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450281519.452, "dur": 33.015, + "args": { + "External id": 454522,"Sequence number": 5036409, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13758 + } + }, + { + "ph": "s", "id": 181, "pid": 2338707, "tid": 2338707, "ts": 6267450281519.452, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450281537.737, "dur": 9.443, + "args": { + "External id": 454523,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450281542.230, "dur": 4.768, + "args": { + "External id": 454524,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267450281633.262, "dur": 76.068, + "args": { + "External id": 454525,"Record function id": 0, "Ev Idx": 13761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267450281711.220, "dur": 1152.763, + "args": { + "External id": 454526,"Record function id": 0, "Ev Idx": 13762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450281753.102, "dur": 1097.056, + "args": { + "External id": 454527,"Sequence number": 5036410, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13763 + } + }, + { + "ph": "s", "id": 180, "pid": 2338707, "tid": 2338707, "ts": 6267450281753.102, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450281825.680, "dur": 54.840, + "args": { + "External id": 454528,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450281893.977, "dur": 108.201, + "args": { + "External id": 454529,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450282014.634, "dur": 37.834, + "args": { + "External id": 454530,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450282102.708, "dur": 35.668, + "args": { + "External id": 454531,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450282165.274, "dur": 26.926, + "args": { + "External id": 454532,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267450282214.246, "dur": 16.993, + "args": { + "External id": 454533,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450282255.874, "dur": 133.478, + "args": { + "External id": 454534,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450282310.618, "dur": 12.373, + "args": { + "External id": 454535,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450282316.766, "dur": 5.454, + "args": { + "External id": 454536,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450282326.147, "dur": 3.885, + "args": { + "External id": 454537,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450282331.477, "dur": 1.308, + "args": { + "External id": 454538,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450282335.676, "dur": 4.953, + "args": { + "External id": 454539,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450282400.427, "dur": 50.510, + "args": { + "External id": 454540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267450282483.388, "dur": 30.099, + "args": { + "External id": 454541,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450282524.416, "dur": 42.885, + "args": { + "External id": 454542,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450282576.647, "dur": 52.753, + "args": { + "External id": 454543,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450282655.470, "dur": 29.227, + "args": { + "External id": 454544,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450282692.905, "dur": 39.699, + "args": { + "External id": 454545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450282752.367, "dur": 17.260, + "args": { + "External id": 454546,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13782 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.21)", "pid": 2338707, "tid": 2338707, + "ts": 6267450282930.837, "dur": 80.801, + "args": { + "External id": 454547,"Record function id": 0, "Ev Idx": 13783 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267450283131.035, "dur": 51.643, + "args": { + "External id": 454548,"Record function id": 0, "Ev Idx": 13784 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.22)", "pid": 2338707, "tid": 2338707, + "ts": 6267450283192.983, "dur": 37291.052, + "args": { + "External id": 454549,"Record function id": 0, "Ev Idx": 13785 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338707, "tid": 2338707, + "ts": 6267450283201.577, "dur": 987.296, + "args": { + "External id": 454550,"Record function id": 0, "Ev Idx": 13786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450283289.843, "dur": 9.550, + "args": { + "External id": 454551,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450283313.616, "dur": 40.277, + "args": { + "External id": 454552,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283320.292, "dur": 2.343, + "args": { + "External id": 454553,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283327.475, "dur": 0.762, + "args": { + "External id": 454554,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283329.841, "dur": 0.416, + "args": { + "External id": 454555,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283332.233, "dur": 0.535, + "args": { + "External id": 454556,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283336.346, "dur": 0.385, + "args": { + "External id": 454557,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283338.163, "dur": 0.391, + "args": { + "External id": 454558,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283340.529, "dur": 2.933, + "args": { + "External id": 454559,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283345.290, "dur": 0.336, + "args": { + "External id": 454560,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283347.066, "dur": 0.430, + "args": { + "External id": 454561,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450283365.673, "dur": 56.587, + "args": { + "External id": 454562,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267450283456.082, "dur": 134.743, + "args": { + "External id": 454563,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450283467.696, "dur": 4.221, + "args": { + "External id": 454564,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267450283476.946, "dur": 11.461, + "args": { + "External id": 454565,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450283482.087, "dur": 5.856, + "args": { + "External id": 454566,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283486.220, "dur": 0.516, + "args": { + "External id": 454567,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450283495.393, "dur": 39.287, + "args": { + "External id": 454568,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283498.231, "dur": 2.650, + "args": { + "External id": 454569,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283502.732, "dur": 0.299, + "args": { + "External id": 454570,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283504.316, "dur": 0.523, + "args": { + "External id": 454571,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283509.344, "dur": 2.174, + "args": { + "External id": 454572,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283512.895, "dur": 0.557, + "args": { + "External id": 454573,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283515.321, "dur": 0.349, + "args": { + "External id": 454574,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283518.810, "dur": 0.473, + "args": { + "External id": 454575,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283521.012, "dur": 0.808, + "args": { + "External id": 454576,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450283527.867, "dur": 2.889, + "args": { + "External id": 454577,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450283548.006, "dur": 34.584, + "args": { + "External id": 454578,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267450283667.126, "dur": 375.133, + "args": { + "External id": 454579,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450283700.636, "dur": 336.087, + "args": { + "External id": 454580,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13816, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267450283711.660, "dur": 319.120, + "args": { + "External id": 454581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450284105.828, "dur": 3.515, + "args": { + "External id": 454582,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13818, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338707, "tid": 2338707, + "ts": 6267450284211.137, "dur": 36062.584, + "args": { + "External id": 454583,"Record function id": 0, "Ev Idx": 13819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450284320.521, "dur": 6.646, + "args": { + "External id": 454584,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450284330.423, "dur": 1.245, + "args": { + "External id": 454585,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450284333.264, "dur": 3.238, + "args": { + "External id": 454586,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450284338.168, "dur": 0.871, + "args": { + "External id": 454587,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450284340.301, "dur": 0.713, + "args": { + "External id": 454588,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450284342.123, "dur": 0.870, + "args": { + "External id": 454589,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450284344.817, "dur": 1.080, + "args": { + "External id": 454590,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450284347.531, "dur": 2.139, + "args": { + "External id": 454591,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450284351.193, "dur": 1.063, + "args": { + "External id": 454592,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450284356.015, "dur": 0.895, + "args": { + "External id": 454593,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450284375.473, "dur": 35850.376, + "args": { + "External id": 454594,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450284400.117, "dur": 35816.814, + "args": { + "External id": 454595,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450284416.510, "dur": 17.393, + "args": { + "External id": 454596,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450284437.446, "dur": 35740.591, + "args": { + "External id": 454597,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450284440.061, "dur": 35737.159, + "args": { + "External id": 454598,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450284446.376, "dur": 5.533, + "args": { + "External id": 454599,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450284453.750, "dur": 35719.988, + "args": { + "External id": 454600,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450320423.272, "dur": 34.836, + "args": { + "External id": 454601,"Sequence number": 5036411, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13837 + } + }, + { + "ph": "s", "id": 179, "pid": 2338707, "tid": 2338707, "ts": 6267450320423.272, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450320442.818, "dur": 9.961, + "args": { + "External id": 454602,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450320447.526, "dur": 5.080, + "args": { + "External id": 454603,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267450320525.959, "dur": 88.224, + "args": { + "External id": 454604,"Record function id": 0, "Ev Idx": 13840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267450320616.937, "dur": 1173.074, + "args": { + "External id": 454605,"Record function id": 0, "Ev Idx": 13841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450320656.687, "dur": 1119.526, + "args": { + "External id": 454606,"Sequence number": 5036412, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13842 + } + }, + { + "ph": "s", "id": 178, "pid": 2338707, "tid": 2338707, "ts": 6267450320656.687, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450320731.939, "dur": 53.545, + "args": { + "External id": 454607,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450320797.909, "dur": 107.866, + "args": { + "External id": 454608,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450320918.374, "dur": 38.319, + "args": { + "External id": 454609,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450320965.314, "dur": 30.450, + "args": { + "External id": 454610,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450321023.870, "dur": 26.826, + "args": { + "External id": 454611,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267450321108.942, "dur": 20.758, + "args": { + "External id": 454612,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450321155.462, "dur": 136.508, + "args": { + "External id": 454613,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450321209.424, "dur": 12.927, + "args": { + "External id": 454614,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450321215.499, "dur": 6.079, + "args": { + "External id": 454615,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450321225.328, "dur": 4.814, + "args": { + "External id": 454616,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450321231.908, "dur": 1.223, + "args": { + "External id": 454617,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450321235.861, "dur": 4.760, + "args": { + "External id": 454618,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450321303.655, "dur": 57.602, + "args": { + "External id": 454619,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267450321393.328, "dur": 29.497, + "args": { + "External id": 454620,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450321432.497, "dur": 44.104, + "args": { + "External id": 454621,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450321487.774, "dur": 36.189, + "args": { + "External id": 454622,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450321546.271, "dur": 39.960, + "args": { + "External id": 454623,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450321611.491, "dur": 42.876, + "args": { + "External id": 454624,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450321677.731, "dur": 19.798, + "args": { + "External id": 454625,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13861 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.22)", "pid": 2338707, "tid": 2338707, + "ts": 6267450321858.371, "dur": 82.208, + "args": { + "External id": 454626,"Record function id": 0, "Ev Idx": 13862 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267450322018.881, "dur": 89.843, + "args": { + "External id": 454627,"Record function id": 0, "Ev Idx": 13863 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.23)", "pid": 2338707, "tid": 2338707, + "ts": 6267450322120.953, "dur": 35674.957, + "args": { + "External id": 454628,"Record function id": 0, "Ev Idx": 13864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338707, "tid": 2338707, + "ts": 6267450322130.572, "dur": 1003.960, + "args": { + "External id": 454629,"Record function id": 0, "Ev Idx": 13865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450322224.164, "dur": 11.195, + "args": { + "External id": 454630,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450322250.244, "dur": 41.590, + "args": { + "External id": 454631,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322256.637, "dur": 2.547, + "args": { + "External id": 454632,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322264.736, "dur": 0.440, + "args": { + "External id": 454633,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322267.180, "dur": 0.394, + "args": { + "External id": 454634,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322268.871, "dur": 0.767, + "args": { + "External id": 454635,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322273.111, "dur": 0.425, + "args": { + "External id": 454636,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322275.051, "dur": 0.607, + "args": { + "External id": 454637,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322277.194, "dur": 4.150, + "args": { + "External id": 454638,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322282.823, "dur": 0.441, + "args": { + "External id": 454639,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322284.938, "dur": 0.302, + "args": { + "External id": 454640,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450322304.075, "dur": 60.289, + "args": { + "External id": 454641,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267450322400.256, "dur": 164.009, + "args": { + "External id": 454642,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450322411.956, "dur": 4.221, + "args": { + "External id": 454643,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267450322421.719, "dur": 11.517, + "args": { + "External id": 454644,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450322426.591, "dur": 6.120, + "args": { + "External id": 454645,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322430.801, "dur": 0.674, + "args": { + "External id": 454646,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450322440.471, "dur": 34.274, + "args": { + "External id": 454647,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322443.119, "dur": 2.960, + "args": { + "External id": 454648,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322447.830, "dur": 0.441, + "args": { + "External id": 454649,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322449.698, "dur": 0.389, + "args": { + "External id": 454650,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322453.893, "dur": 1.904, + "args": { + "External id": 454651,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322457.563, "dur": 0.352, + "args": { + "External id": 454652,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322459.612, "dur": 0.429, + "args": { + "External id": 454653,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322463.909, "dur": 0.402, + "args": { + "External id": 454654,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322466.010, "dur": 0.300, + "args": { + "External id": 454655,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450322467.849, "dur": 2.337, + "args": { + "External id": 454656,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450322517.491, "dur": 37.527, + "args": { + "External id": 454657,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267450322640.982, "dur": 350.167, + "args": { + "External id": 454658,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450322674.492, "dur": 311.401, + "args": { + "External id": 454659,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13895, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267450322687.745, "dur": 292.109, + "args": { + "External id": 454660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450323017.645, "dur": 2.404, + "args": { + "External id": 454661,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13897, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338707, "tid": 2338707, + "ts": 6267450323158.729, "dur": 34427.328, + "args": { + "External id": 454662,"Record function id": 0, "Ev Idx": 13898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450323270.067, "dur": 7.048, + "args": { + "External id": 454663,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450323280.534, "dur": 1.106, + "args": { + "External id": 454664,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450323283.360, "dur": 2.968, + "args": { + "External id": 454665,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450323288.291, "dur": 0.942, + "args": { + "External id": 454666,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450323290.470, "dur": 1.074, + "args": { + "External id": 454667,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450323293.218, "dur": 1.261, + "args": { + "External id": 454668,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450323296.449, "dur": 0.829, + "args": { + "External id": 454669,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450323299.018, "dur": 2.080, + "args": { + "External id": 454670,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450323305.842, "dur": 0.847, + "args": { + "External id": 454671,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450323310.623, "dur": 0.829, + "args": { + "External id": 454672,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450323330.702, "dur": 34213.013, + "args": { + "External id": 454673,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450323348.391, "dur": 34187.017, + "args": { + "External id": 454674,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450323374.020, "dur": 16.910, + "args": { + "External id": 454675,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450323394.574, "dur": 34101.864, + "args": { + "External id": 454676,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450323397.154, "dur": 34098.701, + "args": { + "External id": 454677,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450323403.534, "dur": 6.059, + "args": { + "External id": 454678,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450323411.293, "dur": 34081.372, + "args": { + "External id": 454679,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450357736.051, "dur": 34.099, + "args": { + "External id": 454680,"Sequence number": 5036413, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13916 + } + }, + { + "ph": "s", "id": 177, "pid": 2338707, "tid": 2338707, "ts": 6267450357736.051, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450357755.190, "dur": 9.665, + "args": { + "External id": 454681,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450357759.585, "dur": 4.989, + "args": { + "External id": 454682,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267450357837.119, "dur": 74.369, + "args": { + "External id": 454683,"Record function id": 0, "Ev Idx": 13919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267450357913.040, "dur": 1198.296, + "args": { + "External id": 454684,"Record function id": 0, "Ev Idx": 13920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450357952.873, "dur": 1142.968, + "args": { + "External id": 454685,"Sequence number": 5036414, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13921 + } + }, + { + "ph": "s", "id": 176, "pid": 2338707, "tid": 2338707, "ts": 6267450357952.873, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450358023.489, "dur": 83.722, + "args": { + "External id": 454686,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450358124.435, "dur": 105.696, + "args": { + "External id": 454687,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450358242.332, "dur": 41.850, + "args": { + "External id": 454688,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450358293.453, "dur": 30.531, + "args": { + "External id": 454689,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450358355.215, "dur": 28.277, + "args": { + "External id": 454690,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267450358409.680, "dur": 16.129, + "args": { + "External id": 454691,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450358452.074, "dur": 138.267, + "args": { + "External id": 454692,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450358509.001, "dur": 12.526, + "args": { + "External id": 454693,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450358514.741, "dur": 5.944, + "args": { + "External id": 454694,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450358524.454, "dur": 4.572, + "args": { + "External id": 454695,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450358530.202, "dur": 1.127, + "args": { + "External id": 454696,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450358533.885, "dur": 4.624, + "args": { + "External id": 454697,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450358619.791, "dur": 57.736, + "args": { + "External id": 454698,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267450358713.108, "dur": 32.761, + "args": { + "External id": 454699,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450358756.927, "dur": 47.190, + "args": { + "External id": 454700,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450358814.961, "dur": 35.088, + "args": { + "External id": 454701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450358872.905, "dur": 26.046, + "args": { + "External id": 454702,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450358906.493, "dur": 36.398, + "args": { + "External id": 454703,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450358963.793, "dur": 19.432, + "args": { + "External id": 454704,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13940 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.23)", "pid": 2338707, "tid": 2338707, + "ts": 6267450359183.130, "dur": 86.380, + "args": { + "External id": 454705,"Record function id": 0, "Ev Idx": 13941 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267450359346.896, "dur": 49.144, + "args": { + "External id": 454706,"Record function id": 0, "Ev Idx": 13942 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.24)", "pid": 2338707, "tid": 2338707, + "ts": 6267450359406.250, "dur": 35761.277, + "args": { + "External id": 454707,"Record function id": 0, "Ev Idx": 13943 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338707, "tid": 2338707, + "ts": 6267450359414.305, "dur": 1006.096, + "args": { + "External id": 454708,"Record function id": 0, "Ev Idx": 13944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450359502.882, "dur": 9.861, + "args": { + "External id": 454709,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450359526.721, "dur": 40.390, + "args": { + "External id": 454710,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359532.944, "dur": 2.434, + "args": { + "External id": 454711,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359540.274, "dur": 0.544, + "args": { + "External id": 454712,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359542.391, "dur": 0.469, + "args": { + "External id": 454713,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359544.641, "dur": 0.455, + "args": { + "External id": 454714,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359548.496, "dur": 0.794, + "args": { + "External id": 454715,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359550.770, "dur": 0.672, + "args": { + "External id": 454716,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359552.831, "dur": 4.381, + "args": { + "External id": 454717,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359558.647, "dur": 0.356, + "args": { + "External id": 454718,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359560.311, "dur": 0.506, + "args": { + "External id": 454719,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450359579.668, "dur": 74.447, + "args": { + "External id": 454720,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267450359695.031, "dur": 135.408, + "args": { + "External id": 454721,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450359707.714, "dur": 5.844, + "args": { + "External id": 454722,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267450359719.298, "dur": 11.521, + "args": { + "External id": 454723,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450359724.211, "dur": 6.202, + "args": { + "External id": 454724,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359728.319, "dur": 0.508, + "args": { + "External id": 454725,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450359737.950, "dur": 37.066, + "args": { + "External id": 454726,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359740.529, "dur": 6.188, + "args": { + "External id": 454727,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359748.865, "dur": 0.353, + "args": { + "External id": 454728,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359750.566, "dur": 0.727, + "args": { + "External id": 454729,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359754.953, "dur": 2.358, + "args": { + "External id": 454730,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359758.626, "dur": 0.530, + "args": { + "External id": 454731,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359760.777, "dur": 0.356, + "args": { + "External id": 454732,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359764.551, "dur": 0.298, + "args": { + "External id": 454733,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359766.347, "dur": 0.445, + "args": { + "External id": 454734,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450359768.767, "dur": 2.449, + "args": { + "External id": 454735,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450359788.314, "dur": 34.165, + "args": { + "External id": 454736,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267450359886.402, "dur": 426.956, + "args": { + "External id": 454737,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450359928.551, "dur": 379.098, + "args": { + "External id": 454738,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13974, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267450359940.030, "dur": 360.651, + "args": { + "External id": 454739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450360342.965, "dur": 2.380, + "args": { + "External id": 454740,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13976, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338707, "tid": 2338707, + "ts": 6267450360442.550, "dur": 34500.570, + "args": { + "External id": 454741,"Record function id": 0, "Ev Idx": 13977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450360550.353, "dur": 6.887, + "args": { + "External id": 454742,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450360568.093, "dur": 0.964, + "args": { + "External id": 454743,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450360570.996, "dur": 3.246, + "args": { + "External id": 454744,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450360577.927, "dur": 0.872, + "args": { + "External id": 454745,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450360580.300, "dur": 1.187, + "args": { + "External id": 454746,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450360582.884, "dur": 0.789, + "args": { + "External id": 454747,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450360585.761, "dur": 0.728, + "args": { + "External id": 454748,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450360590.404, "dur": 17.637, + "args": { + "External id": 454749,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450360612.293, "dur": 1.238, + "args": { + "External id": 454750,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450360615.632, "dur": 0.675, + "args": { + "External id": 454751,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450360639.091, "dur": 34259.880, + "args": { + "External id": 454752,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450360656.763, "dur": 34233.843, + "args": { + "External id": 454753,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450360674.512, "dur": 16.954, + "args": { + "External id": 454754,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450360697.602, "dur": 34153.286, + "args": { + "External id": 454755,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450360700.308, "dur": 34150.074, + "args": { + "External id": 454756,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450360706.504, "dur": 5.754, + "args": { + "External id": 454757,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450360714.080, "dur": 34133.164, + "args": { + "External id": 454758,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450395108.480, "dur": 32.575, + "args": { + "External id": 454759,"Sequence number": 5036415, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13995 + } + }, + { + "ph": "s", "id": 175, "pid": 2338707, "tid": 2338707, "ts": 6267450395108.480, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450395126.730, "dur": 9.327, + "args": { + "External id": 454760,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450395130.749, "dur": 4.985, + "args": { + "External id": 454761,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267450395208.594, "dur": 79.897, + "args": { + "External id": 454762,"Record function id": 0, "Ev Idx": 13998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267450395289.995, "dur": 1166.165, + "args": { + "External id": 454763,"Record function id": 0, "Ev Idx": 13999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450395329.793, "dur": 1112.403, + "args": { + "External id": 454764,"Sequence number": 5036416, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 14000 + } + }, + { + "ph": "s", "id": 174, "pid": 2338707, "tid": 2338707, "ts": 6267450395329.793, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450395399.105, "dur": 52.226, + "args": { + "External id": 454765,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450395464.863, "dur": 106.209, + "args": { + "External id": 454766,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450395583.250, "dur": 61.524, + "args": { + "External id": 454767,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 14003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450395657.827, "dur": 32.460, + "args": { + "External id": 454768,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 14004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450395717.615, "dur": 34.251, + "args": { + "External id": 454769,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267450395772.985, "dur": 23.609, + "args": { + "External id": 454770,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450395820.411, "dur": 133.835, + "args": { + "External id": 454771,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450395871.769, "dur": 12.085, + "args": { + "External id": 454772,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450395877.607, "dur": 5.414, + "args": { + "External id": 454773,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450395886.995, "dur": 3.892, + "args": { + "External id": 454774,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450395892.399, "dur": 1.373, + "args": { + "External id": 454775,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450395896.606, "dur": 5.224, + "args": { + "External id": 454776,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450395965.106, "dur": 49.589, + "args": { + "External id": 454777,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267450396047.201, "dur": 69.506, + "args": { + "External id": 454778,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450396130.549, "dur": 48.320, + "args": { + "External id": 454779,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450396190.475, "dur": 36.101, + "args": { + "External id": 454780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450396260.068, "dur": 28.400, + "args": { + "External id": 454781,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 14017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450396296.512, "dur": 36.565, + "args": { + "External id": 454782,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 14018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450396353.059, "dur": 17.759, + "args": { + "External id": 454783,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 14019 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.24)", "pid": 2338707, "tid": 2338707, + "ts": 6267450396524.155, "dur": 95.865, + "args": { + "External id": 454784,"Record function id": 0, "Ev Idx": 14020 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267450396704.003, "dur": 51.272, + "args": { + "External id": 454785,"Record function id": 0, "Ev Idx": 14021 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.25)", "pid": 2338707, "tid": 2338707, + "ts": 6267450396765.130, "dur": 36084.083, + "args": { + "External id": 454786,"Record function id": 0, "Ev Idx": 14022 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2338707, "tid": 2338707, + "ts": 6267450396774.405, "dur": 991.817, + "args": { + "External id": 454787,"Record function id": 0, "Ev Idx": 14023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450396865.512, "dur": 10.081, + "args": { + "External id": 454788,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450396889.104, "dur": 41.128, + "args": { + "External id": 454789,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 14025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450396895.429, "dur": 2.481, + "args": { + "External id": 454790,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450396902.704, "dur": 0.326, + "args": { + "External id": 454791,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450396904.758, "dur": 0.521, + "args": { + "External id": 454792,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450396907.217, "dur": 0.504, + "args": { + "External id": 454793,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450396910.902, "dur": 0.555, + "args": { + "External id": 454794,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450396913.244, "dur": 0.491, + "args": { + "External id": 454795,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450396915.487, "dur": 4.768, + "args": { + "External id": 454796,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450396921.722, "dur": 0.432, + "args": { + "External id": 454797,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450396923.632, "dur": 0.267, + "args": { + "External id": 454798,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450396943.832, "dur": 53.759, + "args": { + "External id": 454799,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 14035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267450397031.930, "dur": 181.377, + "args": { + "External id": 454800,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 14036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450397043.955, "dur": 3.927, + "args": { + "External id": 454801,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267450397089.190, "dur": 13.079, + "args": { + "External id": 454802,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 14038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450397094.246, "dur": 7.598, + "args": { + "External id": 454803,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 14039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450397098.776, "dur": 0.951, + "args": { + "External id": 454804,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 14040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267450397111.627, "dur": 34.746, + "args": { + "External id": 454805,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 14041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450397114.578, "dur": 2.399, + "args": { + "External id": 454806,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450397118.456, "dur": 0.345, + "args": { + "External id": 454807,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450397120.355, "dur": 0.456, + "args": { + "External id": 454808,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450397124.654, "dur": 2.211, + "args": { + "External id": 454809,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450397128.835, "dur": 0.579, + "args": { + "External id": 454810,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450397130.992, "dur": 0.370, + "args": { + "External id": 454811,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450397135.745, "dur": 0.433, + "args": { + "External id": 454812,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450397137.990, "dur": 0.269, + "args": { + "External id": 454813,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450397140.163, "dur": 2.403, + "args": { + "External id": 454814,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450397162.036, "dur": 43.240, + "args": { + "External id": 454815,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 14051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267450397273.038, "dur": 388.374, + "args": { + "External id": 454816,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 14052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450397306.428, "dur": 349.325, + "args": { + "External id": 454817,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 14053, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267450397317.344, "dur": 331.868, + "args": { + "External id": 454818,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 14054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267450397687.866, "dur": 2.307, + "args": { + "External id": 454819,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 14055, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2338707, "tid": 2338707, + "ts": 6267450397788.175, "dur": 34855.851, + "args": { + "External id": 454820,"Record function id": 0, "Ev Idx": 14056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450397895.446, "dur": 6.461, + "args": { + "External id": 454821,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 14057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450397905.633, "dur": 1.181, + "args": { + "External id": 454822,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450397908.508, "dur": 3.180, + "args": { + "External id": 454823,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 14059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450397913.387, "dur": 0.663, + "args": { + "External id": 454824,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 14060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450397915.458, "dur": 1.213, + "args": { + "External id": 454825,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 14061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450397917.990, "dur": 0.937, + "args": { + "External id": 454826,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 14062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450397923.063, "dur": 0.834, + "args": { + "External id": 454827,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450397925.613, "dur": 2.337, + "args": { + "External id": 454828,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 14064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450397929.471, "dur": 0.790, + "args": { + "External id": 454829,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 14065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450397931.795, "dur": 0.616, + "args": { + "External id": 454830,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 14066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450397953.153, "dur": 34632.568, + "args": { + "External id": 454831,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 14067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450397970.051, "dur": 34607.052, + "args": { + "External id": 454832,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 14068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450397995.282, "dur": 16.888, + "args": { + "External id": 454833,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450398016.372, "dur": 34525.173, + "args": { + "External id": 454834,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 14070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450398019.346, "dur": 34521.586, + "args": { + "External id": 454835,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 14071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450398025.380, "dur": 5.346, + "args": { + "External id": 454836,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450398032.684, "dur": 34505.099, + "args": { + "External id": 454837,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 14073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450432791.716, "dur": 31.177, + "args": { + "External id": 454838,"Sequence number": 5036417, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 14074 + } + }, + { + "ph": "s", "id": 173, "pid": 2338707, "tid": 2338707, "ts": 6267450432791.716, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450432808.620, "dur": 9.141, + "args": { + "External id": 454839,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 14075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450432812.641, "dur": 4.872, + "args": { + "External id": 454840,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267450432891.513, "dur": 75.802, + "args": { + "External id": 454841,"Record function id": 0, "Ev Idx": 14077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267450432968.757, "dur": 1181.668, + "args": { + "External id": 454842,"Record function id": 0, "Ev Idx": 14078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450433010.988, "dur": 1124.534, + "args": { + "External id": 454843,"Sequence number": 5036418, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 14079 + } + }, + { + "ph": "s", "id": 172, "pid": 2338707, "tid": 2338707, "ts": 6267450433010.988, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450433109.076, "dur": 54.392, + "args": { + "External id": 454844,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450433180.054, "dur": 110.780, + "args": { + "External id": 454845,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450433303.412, "dur": 42.950, + "args": { + "External id": 454846,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 14082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450433356.344, "dur": 31.022, + "args": { + "External id": 454847,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 14083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450433413.792, "dur": 27.915, + "args": { + "External id": 454848,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267450433461.977, "dur": 19.758, + "args": { + "External id": 454849,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450433504.497, "dur": 149.422, + "args": { + "External id": 454850,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450433556.668, "dur": 12.717, + "args": { + "External id": 454851,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450433563.092, "dur": 5.407, + "args": { + "External id": 454852,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450433571.863, "dur": 3.717, + "args": { + "External id": 454853,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450433577.075, "dur": 1.229, + "args": { + "External id": 454854,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450433580.723, "dur": 4.112, + "args": { + "External id": 454855,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450433667.103, "dur": 55.816, + "args": { + "External id": 454856,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267450433757.267, "dur": 31.382, + "args": { + "External id": 454857,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450433799.544, "dur": 42.758, + "args": { + "External id": 454858,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450433853.431, "dur": 35.349, + "args": { + "External id": 454859,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450433913.262, "dur": 28.112, + "args": { + "External id": 454860,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 14096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450433949.120, "dur": 37.836, + "args": { + "External id": 454861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 14097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267450434004.975, "dur": 18.831, + "args": { + "External id": 454862,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 14098 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.25)", "pid": 2338707, "tid": 2338707, + "ts": 6267450434221.147, "dur": 34.415, + "args": { + "External id": 454863,"Record function id": 0, "Ev Idx": 14099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450434398.924, "dur": 320.835, + "args": { + "External id": 454864,"Sequence number": 5036419, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14100 + } + }, + { + "ph": "s", "id": 171, "pid": 2338707, "tid": 2338707, "ts": 6267450434398.924, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450434430.936, "dur": 8.466, + "args": { + "External id": 454865,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450434433.312, "dur": 5.852, + "args": { + "External id": 454866,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450434449.164, "dur": 13.892, + "args": { + "External id": 454867,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450434453.198, "dur": 9.267, + "args": { + "External id": 454868,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450434473.364, "dur": 5.383, + "args": { + "External id": 454869,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450434696.247, "dur": 7.996, + "args": { + "External id": 454870,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450434700.310, "dur": 3.528, + "args": { + "External id": 454871,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450434748.090, "dur": 147.881, + "args": { + "External id": 454872,"Sequence number": 5036420, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450434750.555, "dur": 15.424, + "args": { + "External id": 454873,"Sequence number": 5036420, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14109 + } + }, + { + "ph": "s", "id": 170, "pid": 2338707, "tid": 2338707, "ts": 6267450434750.555, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450434755.610, "dur": 8.483, + "args": { + "External id": 454874,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450434761.611, "dur": 2.167, + "args": { + "External id": 454875,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450434768.951, "dur": 126.686, + "args": { + "External id": 454876,"Sequence number": 5036421, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450434772.186, "dur": 4.607, + "args": { + "External id": 454877,"Sequence number": 5036421, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450434772.772, "dur": 3.866, + "args": { + "External id": 454878,"Sequence number": 5036421, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14114 + } + }, + { + "ph": "s", "id": 169, "pid": 2338707, "tid": 2338707, "ts": 6267450434772.772, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450434780.712, "dur": 97.402, + "args": { + "External id": 454879,"Sequence number": 5036422, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14115 + } + }, + { + "ph": "s", "id": 168, "pid": 2338707, "tid": 2338707, "ts": 6267450434780.712, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450434885.340, "dur": 9.183, + "args": { + "External id": 454880,"Sequence number": 5036423, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14116 + } + }, + { + "ph": "s", "id": 167, "pid": 2338707, "tid": 2338707, "ts": 6267450434885.340, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450434907.186, "dur": 71.042, + "args": { + "External id": 454881,"Sequence number": 5036424, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450434908.205, "dur": 8.928, + "args": { + "External id": 454882,"Sequence number": 5036424, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14118 + } + }, + { + "ph": "s", "id": 166, "pid": 2338707, "tid": 2338707, "ts": 6267450434908.205, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450434911.095, "dur": 4.584, + "args": { + "External id": 454883,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450434914.472, "dur": 0.994, + "args": { + "External id": 454884,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450434917.747, "dur": 60.149, + "args": { + "External id": 454885,"Sequence number": 5036425, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450434918.662, "dur": 5.951, + "args": { + "External id": 454886,"Sequence number": 5036425, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450434919.646, "dur": 4.826, + "args": { + "External id": 454887,"Sequence number": 5036425, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14123 + } + }, + { + "ph": "s", "id": 165, "pid": 2338707, "tid": 2338707, "ts": 6267450434919.646, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450434925.395, "dur": 46.020, + "args": { + "External id": 454888,"Sequence number": 5036426, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14124 + } + }, + { + "ph": "s", "id": 164, "pid": 2338707, "tid": 2338707, "ts": 6267450434925.395, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450434973.298, "dur": 4.180, + "args": { + "External id": 454889,"Sequence number": 5036427, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14125 + } + }, + { + "ph": "s", "id": 163, "pid": 2338707, "tid": 2338707, "ts": 6267450434973.298, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450434987.381, "dur": 104.468, + "args": { + "External id": 454890,"Sequence number": 5036428, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450434988.150, "dur": 5.519, + "args": { + "External id": 454891,"Sequence number": 5036428, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14127 + } + }, + { + "ph": "s", "id": 162, "pid": 2338707, "tid": 2338707, "ts": 6267450434988.150, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450434989.919, "dur": 2.333, + "args": { + "External id": 454892,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450434991.540, "dur": 0.487, + "args": { + "External id": 454893,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450434996.860, "dur": 94.639, + "args": { + "External id": 454894,"Sequence number": 5036429, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450434998.099, "dur": 4.969, + "args": { + "External id": 454895,"Sequence number": 5036429, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450434999.265, "dur": 3.667, + "args": { + "External id": 454896,"Sequence number": 5036429, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14132 + } + }, + { + "ph": "s", "id": 161, "pid": 2338707, "tid": 2338707, "ts": 6267450434999.265, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450435003.677, "dur": 43.194, + "args": { + "External id": 454897,"Sequence number": 5036430, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14133 + } + }, + { + "ph": "s", "id": 160, "pid": 2338707, "tid": 2338707, "ts": 6267450435003.677, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450435049.265, "dur": 41.322, + "args": { + "External id": 454898,"Sequence number": 5036431, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14134 + } + }, + { + "ph": "s", "id": 159, "pid": 2338707, "tid": 2338707, "ts": 6267450435049.265, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450435117.933, "dur": 5.033, + "args": { + "External id": 454899,"Sequence number": 5036432, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450435119.693, "dur": 3.148, + "args": { + "External id": 454900,"Sequence number": 5036432, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14136 + } + }, + { + "ph": "s", "id": 158, "pid": 2338707, "tid": 2338707, "ts": 6267450435119.693, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450435130.930, "dur": 3.160, + "args": { + "External id": 454901,"Sequence number": 5036433, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450435132.217, "dur": 1.745, + "args": { + "External id": 454902,"Sequence number": 5036433, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14138 + } + }, + { + "ph": "s", "id": 157, "pid": 2338707, "tid": 2338707, "ts": 6267450435132.217, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450435140.226, "dur": 4.926, + "args": { + "External id": 454903,"Sequence number": 5036434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450435141.249, "dur": 3.763, + "args": { + "External id": 454904,"Sequence number": 5036434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14140 + } + }, + { + "ph": "s", "id": 156, "pid": 2338707, "tid": 2338707, "ts": 6267450435141.249, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450435185.330, "dur": 198.819, + "args": { + "External id": 454905,"Sequence number": 5036435, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14141 + } + }, + { + "ph": "s", "id": 155, "pid": 2338707, "tid": 2338707, "ts": 6267450435185.330, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450435210.376, "dur": 10.355, + "args": { + "External id": 454906,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450435213.749, "dur": 6.570, + "args": { + "External id": 454907,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450435397.897, "dur": 114.221, + "args": { + "External id": 454908,"Sequence number": 5036436, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14144 + } + }, + { + "ph": "s", "id": 154, "pid": 2338707, "tid": 2338707, "ts": 6267450435397.897, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450435412.209, "dur": 7.135, + "args": { + "External id": 454909,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450435414.694, "dur": 4.297, + "args": { + "External id": 454910,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338707, "tid": 2338707, + "ts": 6267450435542.260, "dur": 220.851, + "args": { + "External id": 454911,"Sequence number": 5036437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14147 + } + }, + { + "ph": "s", "id": 153, "pid": 2338707, "tid": 2338707, "ts": 6267450435542.260, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450435572.174, "dur": 159.464, + "args": { + "External id": 454912,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450435645.264, "dur": 9.131, + "args": { + "External id": 454913,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450435648.273, "dur": 5.518, + "args": { + "External id": 454914,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450435661.435, "dur": 4.074, + "args": { + "External id": 454915,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450435669.068, "dur": 1.093, + "args": { + "External id": 454916,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450435672.798, "dur": 3.239, + "args": { + "External id": 454917,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2338707, + "ts": 6267450435746.616, "dur": 5.968, + "args": { + "External id": 454918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450435768.860, "dur": 6.450, + "args": { + "External id": 454919,"Sequence number": 5036438, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450435770.569, "dur": 4.617, + "args": { + "External id": 454920,"Sequence number": 5036438, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14156 + } + }, + { + "ph": "s", "id": 152, "pid": 2338707, "tid": 2338707, "ts": 6267450435770.569, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450435791.735, "dur": 119.813, + "args": { + "External id": 454921,"Sequence number": 5036439, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450435793.251, "dur": 9.769, + "args": { + "External id": 454922,"Sequence number": 5036439, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14158 + } + }, + { + "ph": "s", "id": 151, "pid": 2338707, "tid": 2338707, "ts": 6267450435793.251, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450435797.032, "dur": 4.832, + "args": { + "External id": 454923,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450435799.684, "dur": 1.910, + "args": { + "External id": 454924,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450435804.449, "dur": 106.767, + "args": { + "External id": 454925,"Sequence number": 5036440, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450435808.648, "dur": 3.460, + "args": { + "External id": 454926,"Sequence number": 5036440, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450435809.523, "dur": 2.393, + "args": { + "External id": 454927,"Sequence number": 5036440, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14163 + } + }, + { + "ph": "s", "id": 150, "pid": 2338707, "tid": 2338707, "ts": 6267450435809.523, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450435813.118, "dur": 90.486, + "args": { + "External id": 454928,"Sequence number": 5036441, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14164 + } + }, + { + "ph": "s", "id": 149, "pid": 2338707, "tid": 2338707, "ts": 6267450435813.118, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450435906.862, "dur": 3.552, + "args": { + "External id": 454929,"Sequence number": 5036442, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14165 + } + }, + { + "ph": "s", "id": 148, "pid": 2338707, "tid": 2338707, "ts": 6267450435906.862, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450435949.198, "dur": 287.320, + "args": { + "External id": 454930,"Sequence number": 5036443, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14166 + } + }, + { + "ph": "s", "id": 147, "pid": 2338707, "tid": 2338707, "ts": 6267450435949.198, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450435969.447, "dur": 2.876, + "args": { + "External id": 454931,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450435970.725, "dur": 1.466, + "args": { + "External id": 454932,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450435978.213, "dur": 7.791, + "args": { + "External id": 454933,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450435979.301, "dur": 6.601, + "args": { + "External id": 454934,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450435982.385, "dur": 3.435, + "args": { + "External id": 454935,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450435994.337, "dur": 6.657, + "args": { + "External id": 454936,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450435996.402, "dur": 4.333, + "args": { + "External id": 454937,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450436007.545, "dur": 2.913, + "args": { + "External id": 454938,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450436016.446, "dur": 3.456, + "args": { + "External id": 454939,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450436208.411, "dur": 4.660, + "args": { + "External id": 454940,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450436209.811, "dur": 2.856, + "args": { + "External id": 454941,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450436218.629, "dur": 2.140, + "args": { + "External id": 454942,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450436219.638, "dur": 1.026, + "args": { + "External id": 454943,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450436257.667, "dur": 118.396, + "args": { + "External id": 454944,"Sequence number": 5036444, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450436258.732, "dur": 8.835, + "args": { + "External id": 454945,"Sequence number": 5036444, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14181 + } + }, + { + "ph": "s", "id": 146, "pid": 2338707, "tid": 2338707, "ts": 6267450436258.732, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450436261.664, "dur": 4.589, + "args": { + "External id": 454946,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450436264.147, "dur": 1.873, + "args": { + "External id": 454947,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450436270.887, "dur": 104.911, + "args": { + "External id": 454948,"Sequence number": 5036445, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450436272.829, "dur": 3.382, + "args": { + "External id": 454949,"Sequence number": 5036445, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450436273.489, "dur": 2.620, + "args": { + "External id": 454950,"Sequence number": 5036445, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14186 + } + }, + { + "ph": "s", "id": 145, "pid": 2338707, "tid": 2338707, "ts": 6267450436273.489, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450436277.209, "dur": 84.304, + "args": { + "External id": 454951,"Sequence number": 5036446, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14187 + } + }, + { + "ph": "s", "id": 144, "pid": 2338707, "tid": 2338707, "ts": 6267450436277.209, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450436364.049, "dur": 11.160, + "args": { + "External id": 454952,"Sequence number": 5036447, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14188 + } + }, + { + "ph": "s", "id": 143, "pid": 2338707, "tid": 2338707, "ts": 6267450436364.049, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450436385.685, "dur": 71.626, + "args": { + "External id": 454953,"Sequence number": 5036448, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450436386.491, "dur": 7.165, + "args": { + "External id": 454954,"Sequence number": 5036448, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14190 + } + }, + { + "ph": "s", "id": 142, "pid": 2338707, "tid": 2338707, "ts": 6267450436386.491, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450436389.214, "dur": 2.964, + "args": { + "External id": 454955,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450436391.190, "dur": 0.859, + "args": { + "External id": 454956,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450436394.340, "dur": 62.728, + "args": { + "External id": 454957,"Sequence number": 5036449, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450436397.623, "dur": 4.873, + "args": { + "External id": 454958,"Sequence number": 5036449, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450436398.399, "dur": 3.955, + "args": { + "External id": 454959,"Sequence number": 5036449, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14195 + } + }, + { + "ph": "s", "id": 141, "pid": 2338707, "tid": 2338707, "ts": 6267450436398.399, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450436402.930, "dur": 50.348, + "args": { + "External id": 454960,"Sequence number": 5036450, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14196 + } + }, + { + "ph": "s", "id": 140, "pid": 2338707, "tid": 2338707, "ts": 6267450436402.930, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450436455.145, "dur": 1.589, + "args": { + "External id": 454961,"Sequence number": 5036451, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14197 + } + }, + { + "ph": "s", "id": 139, "pid": 2338707, "tid": 2338707, "ts": 6267450436455.145, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450436482.633, "dur": 196.543, + "args": { + "External id": 454962,"Sequence number": 5036452, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14198 + } + }, + { + "ph": "s", "id": 138, "pid": 2338707, "tid": 2338707, "ts": 6267450436482.633, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450436529.256, "dur": 4.446, + "args": { + "External id": 454963,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450436570.229, "dur": 91.993, + "args": { + "External id": 454964,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450436571.242, "dur": 7.729, + "args": { + "External id": 454965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450436572.888, "dur": 4.921, + "args": { + "External id": 454966,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450436576.860, "dur": 0.795, + "args": { + "External id": 454967,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450436582.302, "dur": 79.578, + "args": { + "External id": 454968,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450436583.864, "dur": 2.001, + "args": { + "External id": 454969,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450436584.558, "dur": 1.215, + "args": { + "External id": 454970,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450436586.507, "dur": 70.055, + "args": { + "External id": 454971,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450436659.898, "dur": 1.267, + "args": { + "External id": 454972,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2338707, + "ts": 6267450436690.205, "dur": 28.722, + "args": { + "External id": 454973,"Sequence number": 5036453, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14209 + } + }, + { + "ph": "s", "id": 137, "pid": 2338707, "tid": 2338707, "ts": 6267450436690.205, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450436756.897, "dur": 195.722, + "args": { + "External id": 454974,"Sequence number": 5036454, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14210 + } + }, + { + "ph": "s", "id": 136, "pid": 2338707, "tid": 2338707, "ts": 6267450436756.897, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450436778.071, "dur": 3.693, + "args": { + "External id": 454975,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450436779.264, "dur": 2.227, + "args": { + "External id": 454976,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450436790.931, "dur": 7.829, + "args": { + "External id": 454977,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450436794.101, "dur": 4.247, + "args": { + "External id": 454978,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450436804.982, "dur": 3.782, + "args": { + "External id": 454979,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450436935.834, "dur": 2.794, + "args": { + "External id": 454980,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450436936.894, "dur": 1.574, + "args": { + "External id": 454981,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450436971.499, "dur": 137.111, + "args": { + "External id": 454982,"Sequence number": 5036455, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450436972.571, "dur": 8.076, + "args": { + "External id": 454983,"Sequence number": 5036455, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14219 + } + }, + { + "ph": "s", "id": 135, "pid": 2338707, "tid": 2338707, "ts": 6267450436972.571, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450436975.558, "dur": 3.572, + "args": { + "External id": 454984,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450436977.484, "dur": 1.298, + "args": { + "External id": 454985,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450436982.106, "dur": 126.155, + "args": { + "External id": 454986,"Sequence number": 5036456, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450436985.638, "dur": 6.820, + "args": { + "External id": 454987,"Sequence number": 5036456, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450436986.745, "dur": 5.555, + "args": { + "External id": 454988,"Sequence number": 5036456, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14224 + } + }, + { + "ph": "s", "id": 134, "pid": 2338707, "tid": 2338707, "ts": 6267450436986.745, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450436993.314, "dur": 103.942, + "args": { + "External id": 454989,"Sequence number": 5036457, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14225 + } + }, + { + "ph": "s", "id": 133, "pid": 2338707, "tid": 2338707, "ts": 6267450436993.314, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450437101.236, "dur": 6.248, + "args": { + "External id": 454990,"Sequence number": 5036458, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14226 + } + }, + { + "ph": "s", "id": 132, "pid": 2338707, "tid": 2338707, "ts": 6267450437101.236, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450437120.491, "dur": 81.333, + "args": { + "External id": 454991,"Sequence number": 5036459, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450437123.637, "dur": 9.436, + "args": { + "External id": 454992,"Sequence number": 5036459, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14228 + } + }, + { + "ph": "s", "id": 131, "pid": 2338707, "tid": 2338707, "ts": 6267450437123.637, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450437128.481, "dur": 3.112, + "args": { + "External id": 454993,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450437130.375, "dur": 0.996, + "args": { + "External id": 454994,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450437134.151, "dur": 67.421, + "args": { + "External id": 454995,"Sequence number": 5036460, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450437135.471, "dur": 5.435, + "args": { + "External id": 454996,"Sequence number": 5036460, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450437138.221, "dur": 2.543, + "args": { + "External id": 454997,"Sequence number": 5036460, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14233 + } + }, + { + "ph": "s", "id": 130, "pid": 2338707, "tid": 2338707, "ts": 6267450437138.221, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450437142.159, "dur": 51.908, + "args": { + "External id": 454998,"Sequence number": 5036461, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14234 + } + }, + { + "ph": "s", "id": 129, "pid": 2338707, "tid": 2338707, "ts": 6267450437142.159, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450437196.185, "dur": 4.656, + "args": { + "External id": 454999,"Sequence number": 5036462, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14235 + } + }, + { + "ph": "s", "id": 128, "pid": 2338707, "tid": 2338707, "ts": 6267450437196.185, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450437209.663, "dur": 61.363, + "args": { + "External id": 455000,"Sequence number": 5036463, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450437210.369, "dur": 8.344, + "args": { + "External id": 455001,"Sequence number": 5036463, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14237 + } + }, + { + "ph": "s", "id": 127, "pid": 2338707, "tid": 2338707, "ts": 6267450437210.369, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450437214.925, "dur": 2.452, + "args": { + "External id": 455002,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450437216.542, "dur": 0.712, + "args": { + "External id": 455003,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450437219.307, "dur": 51.481, + "args": { + "External id": 455004,"Sequence number": 5036464, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450437220.249, "dur": 8.026, + "args": { + "External id": 455005,"Sequence number": 5036464, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450437221.372, "dur": 6.784, + "args": { + "External id": 455006,"Sequence number": 5036464, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14242 + } + }, + { + "ph": "s", "id": 126, "pid": 2338707, "tid": 2338707, "ts": 6267450437221.372, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450437228.873, "dur": 38.159, + "args": { + "External id": 455007,"Sequence number": 5036465, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14243 + } + }, + { + "ph": "s", "id": 125, "pid": 2338707, "tid": 2338707, "ts": 6267450437228.873, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450437268.817, "dur": 1.664, + "args": { + "External id": 455008,"Sequence number": 5036466, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14244 + } + }, + { + "ph": "s", "id": 124, "pid": 2338707, "tid": 2338707, "ts": 6267450437268.817, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450437289.776, "dur": 4.324, + "args": { + "External id": 455009,"Sequence number": 5036467, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450437291.579, "dur": 2.363, + "args": { + "External id": 455010,"Sequence number": 5036467, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14246 + } + }, + { + "ph": "s", "id": 123, "pid": 2338707, "tid": 2338707, "ts": 6267450437291.579, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450437302.883, "dur": 5.395, + "args": { + "External id": 455011,"Sequence number": 5036468, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450437303.993, "dur": 4.137, + "args": { + "External id": 455012,"Sequence number": 5036468, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14248 + } + }, + { + "ph": "s", "id": 122, "pid": 2338707, "tid": 2338707, "ts": 6267450437303.993, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450437312.377, "dur": 2.868, + "args": { + "External id": 455013,"Sequence number": 5036469, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450437313.465, "dur": 1.656, + "args": { + "External id": 455014,"Sequence number": 5036469, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14250 + } + }, + { + "ph": "s", "id": 121, "pid": 2338707, "tid": 2338707, "ts": 6267450437313.465, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450437344.105, "dur": 159.692, + "args": { + "External id": 455015,"Sequence number": 5036470, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14251 + } + }, + { + "ph": "s", "id": 120, "pid": 2338707, "tid": 2338707, "ts": 6267450437344.105, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450437365.399, "dur": 8.681, + "args": { + "External id": 455016,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450437368.579, "dur": 5.150, + "args": { + "External id": 455017,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450437516.486, "dur": 127.021, + "args": { + "External id": 455018,"Sequence number": 5036471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14254 + } + }, + { + "ph": "s", "id": 119, "pid": 2338707, "tid": 2338707, "ts": 6267450437516.486, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450437530.189, "dur": 6.321, + "args": { + "External id": 455019,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450437532.496, "dur": 3.666, + "args": { + "External id": 455020,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338707, "tid": 2338707, + "ts": 6267450437676.880, "dur": 182.583, + "args": { + "External id": 455021,"Sequence number": 5036472, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14257 + } + }, + { + "ph": "s", "id": 118, "pid": 2338707, "tid": 2338707, "ts": 6267450437676.880, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450437707.435, "dur": 124.433, + "args": { + "External id": 455022,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450437754.092, "dur": 9.567, + "args": { + "External id": 455023,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450437756.863, "dur": 6.263, + "args": { + "External id": 455024,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450437766.430, "dur": 4.213, + "args": { + "External id": 455025,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450437772.316, "dur": 1.211, + "args": { + "External id": 455026,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450437776.132, "dur": 3.561, + "args": { + "External id": 455027,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2338707, + "ts": 6267450437843.486, "dur": 5.290, + "args": { + "External id": 455028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450437865.617, "dur": 5.776, + "args": { + "External id": 455029,"Sequence number": 5036473, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450437867.185, "dur": 4.022, + "args": { + "External id": 455030,"Sequence number": 5036473, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14266 + } + }, + { + "ph": "s", "id": 117, "pid": 2338707, "tid": 2338707, "ts": 6267450437867.185, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450437883.607, "dur": 116.624, + "args": { + "External id": 455031,"Sequence number": 5036474, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450437885.032, "dur": 9.001, + "args": { + "External id": 455032,"Sequence number": 5036474, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14268 + } + }, + { + "ph": "s", "id": 116, "pid": 2338707, "tid": 2338707, "ts": 6267450437885.032, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450437888.204, "dur": 4.583, + "args": { + "External id": 455033,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450437890.784, "dur": 1.773, + "args": { + "External id": 455034,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450437897.721, "dur": 102.207, + "args": { + "External id": 455035,"Sequence number": 5036475, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450437899.894, "dur": 3.087, + "args": { + "External id": 455036,"Sequence number": 5036475, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450437901.073, "dur": 1.772, + "args": { + "External id": 455037,"Sequence number": 5036475, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14273 + } + }, + { + "ph": "s", "id": 115, "pid": 2338707, "tid": 2338707, "ts": 6267450437901.073, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450437904.135, "dur": 84.728, + "args": { + "External id": 455038,"Sequence number": 5036476, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14274 + } + }, + { + "ph": "s", "id": 114, "pid": 2338707, "tid": 2338707, "ts": 6267450437904.135, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450437991.990, "dur": 7.306, + "args": { + "External id": 455039,"Sequence number": 5036477, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14275 + } + }, + { + "ph": "s", "id": 113, "pid": 2338707, "tid": 2338707, "ts": 6267450437991.990, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450438045.996, "dur": 283.799, + "args": { + "External id": 455040,"Sequence number": 5036478, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14276 + } + }, + { + "ph": "s", "id": 112, "pid": 2338707, "tid": 2338707, "ts": 6267450438045.996, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450438104.316, "dur": 6.540, + "args": { + "External id": 455041,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450438105.829, "dur": 4.682, + "args": { + "External id": 455042,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450438115.723, "dur": 5.833, + "args": { + "External id": 455043,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450438119.619, "dur": 1.830, + "args": { + "External id": 455044,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450438120.448, "dur": 0.904, + "args": { + "External id": 455045,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450438130.003, "dur": 8.485, + "args": { + "External id": 455046,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450438132.403, "dur": 5.759, + "args": { + "External id": 455047,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450438145.072, "dur": 3.346, + "args": { + "External id": 455048,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450438152.001, "dur": 4.619, + "args": { + "External id": 455049,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450438304.944, "dur": 3.728, + "args": { + "External id": 455050,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450438306.195, "dur": 2.105, + "args": { + "External id": 455051,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450438311.621, "dur": 2.018, + "args": { + "External id": 455052,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450438312.674, "dur": 0.869, + "args": { + "External id": 455053,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450438349.502, "dur": 113.574, + "args": { + "External id": 455054,"Sequence number": 5036479, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450438350.475, "dur": 11.431, + "args": { + "External id": 455055,"Sequence number": 5036479, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14291 + } + }, + { + "ph": "s", "id": 111, "pid": 2338707, "tid": 2338707, "ts": 6267450438350.475, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450438353.835, "dur": 6.784, + "args": { + "External id": 455056,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450438358.519, "dur": 1.869, + "args": { + "External id": 455057,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450438363.227, "dur": 99.576, + "args": { + "External id": 455058,"Sequence number": 5036480, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450438365.050, "dur": 6.328, + "args": { + "External id": 455059,"Sequence number": 5036480, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450438366.299, "dur": 4.894, + "args": { + "External id": 455060,"Sequence number": 5036480, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14296 + } + }, + { + "ph": "s", "id": 110, "pid": 2338707, "tid": 2338707, "ts": 6267450438366.299, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450438374.439, "dur": 81.773, + "args": { + "External id": 455061,"Sequence number": 5036481, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14297 + } + }, + { + "ph": "s", "id": 109, "pid": 2338707, "tid": 2338707, "ts": 6267450438374.439, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450438458.978, "dur": 3.225, + "args": { + "External id": 455062,"Sequence number": 5036482, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14298 + } + }, + { + "ph": "s", "id": 108, "pid": 2338707, "tid": 2338707, "ts": 6267450438458.978, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450438472.401, "dur": 83.551, + "args": { + "External id": 455063,"Sequence number": 5036483, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450438473.392, "dur": 15.303, + "args": { + "External id": 455064,"Sequence number": 5036483, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14300 + } + }, + { + "ph": "s", "id": 107, "pid": 2338707, "tid": 2338707, "ts": 6267450438473.392, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450438481.989, "dur": 5.279, + "args": { + "External id": 455065,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450438486.247, "dur": 0.872, + "args": { + "External id": 455066,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450438489.509, "dur": 66.152, + "args": { + "External id": 455067,"Sequence number": 5036484, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450438490.939, "dur": 6.514, + "args": { + "External id": 455068,"Sequence number": 5036484, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450438491.628, "dur": 5.684, + "args": { + "External id": 455069,"Sequence number": 5036484, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14305 + } + }, + { + "ph": "s", "id": 106, "pid": 2338707, "tid": 2338707, "ts": 6267450438491.628, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450438498.422, "dur": 48.907, + "args": { + "External id": 455070,"Sequence number": 5036485, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14306 + } + }, + { + "ph": "s", "id": 105, "pid": 2338707, "tid": 2338707, "ts": 6267450438498.422, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450438549.451, "dur": 5.828, + "args": { + "External id": 455071,"Sequence number": 5036486, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14307 + } + }, + { + "ph": "s", "id": 104, "pid": 2338707, "tid": 2338707, "ts": 6267450438549.451, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450438578.519, "dur": 181.964, + "args": { + "External id": 455072,"Sequence number": 5036487, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14308 + } + }, + { + "ph": "s", "id": 103, "pid": 2338707, "tid": 2338707, "ts": 6267450438578.519, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450438637.083, "dur": 5.135, + "args": { + "External id": 455073,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450438681.882, "dur": 63.540, + "args": { + "External id": 455074,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450438682.840, "dur": 6.399, + "args": { + "External id": 455075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450438684.506, "dur": 3.397, + "args": { + "External id": 455076,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450438686.476, "dur": 1.259, + "args": { + "External id": 455077,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450438690.546, "dur": 54.466, + "args": { + "External id": 455078,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450438691.941, "dur": 2.363, + "args": { + "External id": 455079,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450438692.675, "dur": 1.528, + "args": { + "External id": 455080,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450438694.756, "dur": 46.678, + "args": { + "External id": 455081,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450438743.379, "dur": 1.016, + "args": { + "External id": 455082,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2338707, + "ts": 6267450438769.861, "dur": 27.334, + "args": { + "External id": 455083,"Sequence number": 5036488, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14319 + } + }, + { + "ph": "s", "id": 102, "pid": 2338707, "tid": 2338707, "ts": 6267450438769.861, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450438838.464, "dur": 195.511, + "args": { + "External id": 455084,"Sequence number": 5036489, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14320 + } + }, + { + "ph": "s", "id": 101, "pid": 2338707, "tid": 2338707, "ts": 6267450438838.464, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450438858.690, "dur": 2.911, + "args": { + "External id": 455085,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450438859.700, "dur": 1.732, + "args": { + "External id": 455086,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450438870.216, "dur": 8.077, + "args": { + "External id": 455087,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450438873.892, "dur": 4.034, + "args": { + "External id": 455088,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450438884.824, "dur": 5.717, + "args": { + "External id": 455089,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450439019.482, "dur": 3.377, + "args": { + "External id": 455090,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450439020.753, "dur": 1.891, + "args": { + "External id": 455091,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450439052.608, "dur": 142.935, + "args": { + "External id": 455092,"Sequence number": 5036490, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450439053.651, "dur": 47.046, + "args": { + "External id": 455093,"Sequence number": 5036490, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14329 + } + }, + { + "ph": "s", "id": 100, "pid": 2338707, "tid": 2338707, "ts": 6267450439053.651, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450439094.206, "dur": 4.253, + "args": { + "External id": 455094,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450439096.580, "dur": 1.545, + "args": { + "External id": 455095,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450439103.964, "dur": 91.335, + "args": { + "External id": 455096,"Sequence number": 5036491, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450439105.731, "dur": 4.957, + "args": { + "External id": 455097,"Sequence number": 5036491, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450439107.025, "dur": 3.543, + "args": { + "External id": 455098,"Sequence number": 5036491, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14334 + } + }, + { + "ph": "s", "id": 99, "pid": 2338707, "tid": 2338707, "ts": 6267450439107.025, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450439111.631, "dur": 76.073, + "args": { + "External id": 455099,"Sequence number": 5036492, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14335 + } + }, + { + "ph": "s", "id": 98, "pid": 2338707, "tid": 2338707, "ts": 6267450439111.631, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450439189.904, "dur": 4.631, + "args": { + "External id": 455100,"Sequence number": 5036493, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14336 + } + }, + { + "ph": "s", "id": 97, "pid": 2338707, "tid": 2338707, "ts": 6267450439189.904, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450439207.295, "dur": 64.016, + "args": { + "External id": 455101,"Sequence number": 5036494, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450439208.245, "dur": 6.052, + "args": { + "External id": 455102,"Sequence number": 5036494, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14338 + } + }, + { + "ph": "s", "id": 96, "pid": 2338707, "tid": 2338707, "ts": 6267450439208.245, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450439210.088, "dur": 3.034, + "args": { + "External id": 455103,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450439212.021, "dur": 0.976, + "args": { + "External id": 455104,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450439215.008, "dur": 56.077, + "args": { + "External id": 455105,"Sequence number": 5036495, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450439218.676, "dur": 3.575, + "args": { + "External id": 455106,"Sequence number": 5036495, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450439219.589, "dur": 2.541, + "args": { + "External id": 455107,"Sequence number": 5036495, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14343 + } + }, + { + "ph": "s", "id": 95, "pid": 2338707, "tid": 2338707, "ts": 6267450439219.589, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450439222.940, "dur": 43.931, + "args": { + "External id": 455108,"Sequence number": 5036496, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14344 + } + }, + { + "ph": "s", "id": 94, "pid": 2338707, "tid": 2338707, "ts": 6267450439222.940, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450439268.824, "dur": 1.901, + "args": { + "External id": 455109,"Sequence number": 5036497, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14345 + } + }, + { + "ph": "s", "id": 93, "pid": 2338707, "tid": 2338707, "ts": 6267450439268.824, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450439284.258, "dur": 63.097, + "args": { + "External id": 455110,"Sequence number": 5036498, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450439287.331, "dur": 7.885, + "args": { + "External id": 455111,"Sequence number": 5036498, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14347 + } + }, + { + "ph": "s", "id": 92, "pid": 2338707, "tid": 2338707, "ts": 6267450439287.331, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450439289.499, "dur": 4.396, + "args": { + "External id": 455112,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450439291.059, "dur": 2.611, + "args": { + "External id": 455113,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450439296.224, "dur": 50.833, + "args": { + "External id": 455114,"Sequence number": 5036499, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450439297.281, "dur": 6.047, + "args": { + "External id": 455115,"Sequence number": 5036499, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450439300.416, "dur": 2.804, + "args": { + "External id": 455116,"Sequence number": 5036499, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14352 + } + }, + { + "ph": "s", "id": 91, "pid": 2338707, "tid": 2338707, "ts": 6267450439300.416, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450439303.857, "dur": 35.954, + "args": { + "External id": 455117,"Sequence number": 5036500, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14353 + } + }, + { + "ph": "s", "id": 90, "pid": 2338707, "tid": 2338707, "ts": 6267450439303.857, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450439341.656, "dur": 5.042, + "args": { + "External id": 455118,"Sequence number": 5036501, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14354 + } + }, + { + "ph": "s", "id": 89, "pid": 2338707, "tid": 2338707, "ts": 6267450439341.656, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450439365.205, "dur": 5.346, + "args": { + "External id": 455119,"Sequence number": 5036502, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450439367.856, "dur": 2.521, + "args": { + "External id": 455120,"Sequence number": 5036502, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14356 + } + }, + { + "ph": "s", "id": 88, "pid": 2338707, "tid": 2338707, "ts": 6267450439367.856, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450439377.864, "dur": 3.108, + "args": { + "External id": 455121,"Sequence number": 5036503, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450439378.784, "dur": 2.076, + "args": { + "External id": 455122,"Sequence number": 5036503, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14358 + } + }, + { + "ph": "s", "id": 87, "pid": 2338707, "tid": 2338707, "ts": 6267450439378.784, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450439384.820, "dur": 5.566, + "args": { + "External id": 455123,"Sequence number": 5036504, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450439386.079, "dur": 4.197, + "args": { + "External id": 455124,"Sequence number": 5036504, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14360 + } + }, + { + "ph": "s", "id": 86, "pid": 2338707, "tid": 2338707, "ts": 6267450439386.079, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450439422.697, "dur": 160.831, + "args": { + "External id": 455125,"Sequence number": 5036505, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14361 + } + }, + { + "ph": "s", "id": 85, "pid": 2338707, "tid": 2338707, "ts": 6267450439422.697, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450439444.625, "dur": 11.048, + "args": { + "External id": 455126,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450439448.014, "dur": 7.244, + "args": { + "External id": 455127,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450439613.655, "dur": 121.137, + "args": { + "External id": 455128,"Sequence number": 5036506, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14364 + } + }, + { + "ph": "s", "id": 84, "pid": 2338707, "tid": 2338707, "ts": 6267450439613.655, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450439630.159, "dur": 8.281, + "args": { + "External id": 455129,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450439633.332, "dur": 4.603, + "args": { + "External id": 455130,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338707, "tid": 2338707, + "ts": 6267450439764.854, "dur": 177.856, + "args": { + "External id": 455131,"Sequence number": 5036507, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14367 + } + }, + { + "ph": "s", "id": 83, "pid": 2338707, "tid": 2338707, "ts": 6267450439764.854, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450439790.699, "dur": 126.727, + "args": { + "External id": 455132,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450439839.526, "dur": 7.140, + "args": { + "External id": 455133,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450439842.131, "dur": 3.930, + "args": { + "External id": 455134,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450439849.612, "dur": 5.637, + "args": { + "External id": 455135,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450439856.647, "dur": 1.134, + "args": { + "External id": 455136,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450439862.282, "dur": 2.981, + "args": { + "External id": 455137,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2338707, + "ts": 6267450439929.224, "dur": 4.903, + "args": { + "External id": 455138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450439948.140, "dur": 6.548, + "args": { + "External id": 455139,"Sequence number": 5036508, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450439950.373, "dur": 4.120, + "args": { + "External id": 455140,"Sequence number": 5036508, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14376 + } + }, + { + "ph": "s", "id": 82, "pid": 2338707, "tid": 2338707, "ts": 6267450439950.373, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450439966.286, "dur": 156.844, + "args": { + "External id": 455141,"Sequence number": 5036509, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450439967.704, "dur": 11.632, + "args": { + "External id": 455142,"Sequence number": 5036509, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14378 + } + }, + { + "ph": "s", "id": 81, "pid": 2338707, "tid": 2338707, "ts": 6267450439967.704, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450439972.421, "dur": 5.755, + "args": { + "External id": 455143,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450439976.244, "dur": 1.710, + "args": { + "External id": 455144,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450439980.759, "dur": 141.921, + "args": { + "External id": 455145,"Sequence number": 5036510, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450439982.758, "dur": 4.728, + "args": { + "External id": 455146,"Sequence number": 5036510, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450439983.597, "dur": 3.778, + "args": { + "External id": 455147,"Sequence number": 5036510, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14383 + } + }, + { + "ph": "s", "id": 80, "pid": 2338707, "tid": 2338707, "ts": 6267450439983.597, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450439988.640, "dur": 122.935, + "args": { + "External id": 455148,"Sequence number": 5036511, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14384 + } + }, + { + "ph": "s", "id": 79, "pid": 2338707, "tid": 2338707, "ts": 6267450439988.640, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450440115.487, "dur": 6.197, + "args": { + "External id": 455149,"Sequence number": 5036512, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14385 + } + }, + { + "ph": "s", "id": 78, "pid": 2338707, "tid": 2338707, "ts": 6267450440115.487, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450440163.064, "dur": 231.927, + "args": { + "External id": 455150,"Sequence number": 5036513, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14386 + } + }, + { + "ph": "s", "id": 77, "pid": 2338707, "tid": 2338707, "ts": 6267450440163.064, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450440182.473, "dur": 2.992, + "args": { + "External id": 455151,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450440183.665, "dur": 1.578, + "args": { + "External id": 455152,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450440189.494, "dur": 5.774, + "args": { + "External id": 455153,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450440193.126, "dur": 2.039, + "args": { + "External id": 455154,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450440194.069, "dur": 0.999, + "args": { + "External id": 455155,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450440203.135, "dur": 18.802, + "args": { + "External id": 455156,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450440214.076, "dur": 7.484, + "args": { + "External id": 455157,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450440228.347, "dur": 4.432, + "args": { + "External id": 455158,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450440238.306, "dur": 5.205, + "args": { + "External id": 455159,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450440374.874, "dur": 3.253, + "args": { + "External id": 455160,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450440375.828, "dur": 1.852, + "args": { + "External id": 455161,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450440380.721, "dur": 2.231, + "args": { + "External id": 455162,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450440381.776, "dur": 1.065, + "args": { + "External id": 455163,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450440414.330, "dur": 138.491, + "args": { + "External id": 455164,"Sequence number": 5036514, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450440415.513, "dur": 13.184, + "args": { + "External id": 455165,"Sequence number": 5036514, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14401 + } + }, + { + "ph": "s", "id": 76, "pid": 2338707, "tid": 2338707, "ts": 6267450440415.513, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450440420.968, "dur": 6.286, + "args": { + "External id": 455166,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450440423.053, "dur": 3.912, + "args": { + "External id": 455167,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450440429.540, "dur": 122.992, + "args": { + "External id": 455168,"Sequence number": 5036515, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450440456.095, "dur": 4.912, + "args": { + "External id": 455169,"Sequence number": 5036515, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450440456.943, "dur": 3.852, + "args": { + "External id": 455170,"Sequence number": 5036515, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14406 + } + }, + { + "ph": "s", "id": 75, "pid": 2338707, "tid": 2338707, "ts": 6267450440456.943, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450440461.981, "dur": 84.654, + "args": { + "External id": 455171,"Sequence number": 5036516, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14407 + } + }, + { + "ph": "s", "id": 74, "pid": 2338707, "tid": 2338707, "ts": 6267450440461.981, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450440549.317, "dur": 2.695, + "args": { + "External id": 455172,"Sequence number": 5036517, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14408 + } + }, + { + "ph": "s", "id": 73, "pid": 2338707, "tid": 2338707, "ts": 6267450440549.317, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450440562.849, "dur": 99.792, + "args": { + "External id": 455173,"Sequence number": 5036518, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450440563.357, "dur": 9.984, + "args": { + "External id": 455174,"Sequence number": 5036518, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14410 + } + }, + { + "ph": "s", "id": 72, "pid": 2338707, "tid": 2338707, "ts": 6267450440563.357, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450440564.939, "dur": 7.121, + "args": { + "External id": 455175,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450440568.489, "dur": 3.286, + "args": { + "External id": 455176,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450440573.862, "dur": 88.464, + "args": { + "External id": 455177,"Sequence number": 5036519, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450440574.862, "dur": 6.133, + "args": { + "External id": 455178,"Sequence number": 5036519, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450440575.433, "dur": 5.421, + "args": { + "External id": 455179,"Sequence number": 5036519, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14415 + } + }, + { + "ph": "s", "id": 71, "pid": 2338707, "tid": 2338707, "ts": 6267450440575.433, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450440583.440, "dur": 71.090, + "args": { + "External id": 455180,"Sequence number": 5036520, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14416 + } + }, + { + "ph": "s", "id": 70, "pid": 2338707, "tid": 2338707, "ts": 6267450440583.440, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450440657.370, "dur": 4.386, + "args": { + "External id": 455181,"Sequence number": 5036521, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14417 + } + }, + { + "ph": "s", "id": 69, "pid": 2338707, "tid": 2338707, "ts": 6267450440657.370, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450440689.151, "dur": 159.889, + "args": { + "External id": 455182,"Sequence number": 5036522, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14418 + } + }, + { + "ph": "s", "id": 68, "pid": 2338707, "tid": 2338707, "ts": 6267450440689.151, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450440729.650, "dur": 5.538, + "args": { + "External id": 455183,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450440768.464, "dur": 65.978, + "args": { + "External id": 455184,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450440769.374, "dur": 5.814, + "args": { + "External id": 455185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450440770.638, "dur": 3.302, + "args": { + "External id": 455186,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450440772.426, "dur": 1.172, + "args": { + "External id": 455187,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450440775.995, "dur": 58.114, + "args": { + "External id": 455188,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450440777.693, "dur": 2.283, + "args": { + "External id": 455189,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450440778.752, "dur": 1.085, + "args": { + "External id": 455190,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450440782.909, "dur": 45.429, + "args": { + "External id": 455191,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450440830.467, "dur": 3.052, + "args": { + "External id": 455192,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2338707, + "ts": 6267450440858.661, "dur": 25.625, + "args": { + "External id": 455193,"Sequence number": 5036523, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14429 + } + }, + { + "ph": "s", "id": 67, "pid": 2338707, "tid": 2338707, "ts": 6267450440858.661, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450440922.468, "dur": 240.843, + "args": { + "External id": 455194,"Sequence number": 5036524, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14430 + } + }, + { + "ph": "s", "id": 66, "pid": 2338707, "tid": 2338707, "ts": 6267450440922.468, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450440942.333, "dur": 2.777, + "args": { + "External id": 455195,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450440943.327, "dur": 1.379, + "args": { + "External id": 455196,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450440953.769, "dur": 9.457, + "args": { + "External id": 455197,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450440958.840, "dur": 4.001, + "args": { + "External id": 455198,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450440969.797, "dur": 4.257, + "args": { + "External id": 455199,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450441143.952, "dur": 4.219, + "args": { + "External id": 455200,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450441145.021, "dur": 2.779, + "args": { + "External id": 455201,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450441183.141, "dur": 107.341, + "args": { + "External id": 455202,"Sequence number": 5036525, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450441183.983, "dur": 11.223, + "args": { + "External id": 455203,"Sequence number": 5036525, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14439 + } + }, + { + "ph": "s", "id": 65, "pid": 2338707, "tid": 2338707, "ts": 6267450441183.983, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450441187.514, "dur": 6.214, + "args": { + "External id": 455204,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450441192.069, "dur": 1.451, + "args": { + "External id": 455205,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450441196.261, "dur": 93.919, + "args": { + "External id": 455206,"Sequence number": 5036526, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450441197.940, "dur": 3.808, + "args": { + "External id": 455207,"Sequence number": 5036526, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450441198.745, "dur": 2.875, + "args": { + "External id": 455208,"Sequence number": 5036526, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14444 + } + }, + { + "ph": "s", "id": 64, "pid": 2338707, "tid": 2338707, "ts": 6267450441198.745, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450441206.811, "dur": 77.328, + "args": { + "External id": 455209,"Sequence number": 5036527, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14445 + } + }, + { + "ph": "s", "id": 63, "pid": 2338707, "tid": 2338707, "ts": 6267450441206.811, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450441286.806, "dur": 2.625, + "args": { + "External id": 455210,"Sequence number": 5036528, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14446 + } + }, + { + "ph": "s", "id": 62, "pid": 2338707, "tid": 2338707, "ts": 6267450441286.806, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450441300.308, "dur": 60.416, + "args": { + "External id": 455211,"Sequence number": 5036529, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450441300.792, "dur": 5.385, + "args": { + "External id": 455212,"Sequence number": 5036529, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14448 + } + }, + { + "ph": "s", "id": 61, "pid": 2338707, "tid": 2338707, "ts": 6267450441300.792, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450441302.474, "dur": 2.459, + "args": { + "External id": 455213,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450441304.076, "dur": 0.703, + "args": { + "External id": 455214,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450441308.840, "dur": 51.658, + "args": { + "External id": 455215,"Sequence number": 5036530, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450441309.606, "dur": 3.346, + "args": { + "External id": 455216,"Sequence number": 5036530, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450441310.250, "dur": 2.584, + "args": { + "External id": 455217,"Sequence number": 5036530, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14453 + } + }, + { + "ph": "s", "id": 60, "pid": 2338707, "tid": 2338707, "ts": 6267450441310.250, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450441313.625, "dur": 42.509, + "args": { + "External id": 455218,"Sequence number": 5036531, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14454 + } + }, + { + "ph": "s", "id": 59, "pid": 2338707, "tid": 2338707, "ts": 6267450441313.625, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450441357.919, "dur": 2.243, + "args": { + "External id": 455219,"Sequence number": 5036532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14455 + } + }, + { + "ph": "s", "id": 58, "pid": 2338707, "tid": 2338707, "ts": 6267450441357.919, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450441371.210, "dur": 60.932, + "args": { + "External id": 455220,"Sequence number": 5036533, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450441371.701, "dur": 7.330, + "args": { + "External id": 455221,"Sequence number": 5036533, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14457 + } + }, + { + "ph": "s", "id": 57, "pid": 2338707, "tid": 2338707, "ts": 6267450441371.701, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450441373.390, "dur": 4.403, + "args": { + "External id": 455222,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450441374.836, "dur": 2.703, + "args": { + "External id": 455223,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450441379.720, "dur": 52.174, + "args": { + "External id": 455224,"Sequence number": 5036534, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450441383.037, "dur": 4.221, + "args": { + "External id": 455225,"Sequence number": 5036534, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450441383.741, "dur": 3.322, + "args": { + "External id": 455226,"Sequence number": 5036534, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14462 + } + }, + { + "ph": "s", "id": 56, "pid": 2338707, "tid": 2338707, "ts": 6267450441383.741, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450441387.732, "dur": 37.984, + "args": { + "External id": 455227,"Sequence number": 5036535, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14463 + } + }, + { + "ph": "s", "id": 55, "pid": 2338707, "tid": 2338707, "ts": 6267450441387.732, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450441427.724, "dur": 3.810, + "args": { + "External id": 455228,"Sequence number": 5036536, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14464 + } + }, + { + "ph": "s", "id": 54, "pid": 2338707, "tid": 2338707, "ts": 6267450441427.724, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450441450.993, "dur": 3.994, + "args": { + "External id": 455229,"Sequence number": 5036537, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450441452.025, "dur": 2.795, + "args": { + "External id": 455230,"Sequence number": 5036537, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14466 + } + }, + { + "ph": "s", "id": 53, "pid": 2338707, "tid": 2338707, "ts": 6267450441452.025, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450441462.138, "dur": 4.055, + "args": { + "External id": 455231,"Sequence number": 5036538, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450441462.909, "dur": 3.162, + "args": { + "External id": 455232,"Sequence number": 5036538, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14468 + } + }, + { + "ph": "s", "id": 52, "pid": 2338707, "tid": 2338707, "ts": 6267450441462.909, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450441470.391, "dur": 4.370, + "args": { + "External id": 455233,"Sequence number": 5036539, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450441471.181, "dur": 3.466, + "args": { + "External id": 455234,"Sequence number": 5036539, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14470 + } + }, + { + "ph": "s", "id": 51, "pid": 2338707, "tid": 2338707, "ts": 6267450441471.181, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450441506.397, "dur": 178.551, + "args": { + "External id": 455235,"Sequence number": 5036540, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14471 + } + }, + { + "ph": "s", "id": 50, "pid": 2338707, "tid": 2338707, "ts": 6267450441506.397, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450441527.716, "dur": 10.039, + "args": { + "External id": 455236,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450441530.786, "dur": 6.487, + "args": { + "External id": 455237,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450441699.557, "dur": 105.575, + "args": { + "External id": 455238,"Sequence number": 5036541, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14474 + } + }, + { + "ph": "s", "id": 49, "pid": 2338707, "tid": 2338707, "ts": 6267450441699.557, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450441713.960, "dur": 7.963, + "args": { + "External id": 455239,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450441716.407, "dur": 5.155, + "args": { + "External id": 455240,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338707, "tid": 2338707, + "ts": 6267450441834.938, "dur": 178.202, + "args": { + "External id": 455241,"Sequence number": 5036542, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14477 + } + }, + { + "ph": "s", "id": 48, "pid": 2338707, "tid": 2338707, "ts": 6267450441834.938, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450441861.599, "dur": 126.033, + "args": { + "External id": 455242,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450441913.698, "dur": 6.434, + "args": { + "External id": 455243,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450441916.053, "dur": 3.629, + "args": { + "External id": 455244,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450441922.373, "dur": 3.716, + "args": { + "External id": 455245,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450441929.416, "dur": 1.103, + "args": { + "External id": 455246,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450441933.082, "dur": 3.098, + "args": { + "External id": 455247,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2338707, + "ts": 6267450441999.090, "dur": 5.032, + "args": { + "External id": 455248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450442018.355, "dur": 5.521, + "args": { + "External id": 455249,"Sequence number": 5036543, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450442019.667, "dur": 4.061, + "args": { + "External id": 455250,"Sequence number": 5036543, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14486 + } + }, + { + "ph": "s", "id": 47, "pid": 2338707, "tid": 2338707, "ts": 6267450442019.667, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450442035.966, "dur": 155.987, + "args": { + "External id": 455251,"Sequence number": 5036544, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450442037.376, "dur": 10.399, + "args": { + "External id": 455252,"Sequence number": 5036544, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14488 + } + }, + { + "ph": "s", "id": 46, "pid": 2338707, "tid": 2338707, "ts": 6267450442037.376, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450442040.122, "dur": 6.224, + "args": { + "External id": 455253,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450442044.607, "dur": 1.495, + "args": { + "External id": 455254,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450442049.107, "dur": 142.504, + "args": { + "External id": 455255,"Sequence number": 5036545, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450442050.948, "dur": 2.943, + "args": { + "External id": 455256,"Sequence number": 5036545, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450442051.558, "dur": 2.171, + "args": { + "External id": 455257,"Sequence number": 5036545, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14493 + } + }, + { + "ph": "s", "id": 45, "pid": 2338707, "tid": 2338707, "ts": 6267450442051.558, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450442092.727, "dur": 89.107, + "args": { + "External id": 455258,"Sequence number": 5036546, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14494 + } + }, + { + "ph": "s", "id": 44, "pid": 2338707, "tid": 2338707, "ts": 6267450442092.727, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450442184.961, "dur": 5.799, + "args": { + "External id": 455259,"Sequence number": 5036547, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14495 + } + }, + { + "ph": "s", "id": 43, "pid": 2338707, "tid": 2338707, "ts": 6267450442184.961, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450442230.973, "dur": 215.379, + "args": { + "External id": 455260,"Sequence number": 5036548, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14496 + } + }, + { + "ph": "s", "id": 42, "pid": 2338707, "tid": 2338707, "ts": 6267450442230.973, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450442251.722, "dur": 6.816, + "args": { + "External id": 455261,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450442254.666, "dur": 3.716, + "args": { + "External id": 455262,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338707, "tid": 2338707, + "ts": 6267450442262.798, "dur": 2.927, + "args": { + "External id": 455263,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450442263.925, "dur": 1.697, + "args": { + "External id": 455264,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450442264.598, "dur": 0.922, + "args": { + "External id": 455265,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450442273.882, "dur": 6.858, + "args": { + "External id": 455266,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450442275.899, "dur": 4.500, + "args": { + "External id": 455267,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450442289.320, "dur": 4.307, + "args": { + "External id": 455268,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450442297.029, "dur": 3.216, + "args": { + "External id": 455269,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450442425.349, "dur": 3.024, + "args": { + "External id": 455270,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450442426.354, "dur": 1.773, + "args": { + "External id": 455271,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450442430.623, "dur": 2.224, + "args": { + "External id": 455272,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450442431.485, "dur": 1.273, + "args": { + "External id": 455273,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450442463.777, "dur": 96.418, + "args": { + "External id": 455274,"Sequence number": 5036549, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450442465.019, "dur": 9.061, + "args": { + "External id": 455275,"Sequence number": 5036549, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14511 + } + }, + { + "ph": "s", "id": 41, "pid": 2338707, "tid": 2338707, "ts": 6267450442465.019, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450442469.476, "dur": 3.456, + "args": { + "External id": 455276,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450442471.276, "dur": 1.443, + "args": { + "External id": 455277,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450442475.218, "dur": 84.675, + "args": { + "External id": 455278,"Sequence number": 5036550, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450442476.499, "dur": 5.677, + "args": { + "External id": 455279,"Sequence number": 5036550, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450442479.684, "dur": 2.368, + "args": { + "External id": 455280,"Sequence number": 5036550, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14516 + } + }, + { + "ph": "s", "id": 40, "pid": 2338707, "tid": 2338707, "ts": 6267450442479.684, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450442483.069, "dur": 71.492, + "args": { + "External id": 455281,"Sequence number": 5036551, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14517 + } + }, + { + "ph": "s", "id": 39, "pid": 2338707, "tid": 2338707, "ts": 6267450442483.069, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450442556.784, "dur": 2.592, + "args": { + "External id": 455282,"Sequence number": 5036552, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14518 + } + }, + { + "ph": "s", "id": 38, "pid": 2338707, "tid": 2338707, "ts": 6267450442556.784, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450442569.036, "dur": 99.325, + "args": { + "External id": 455283,"Sequence number": 5036553, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450442569.967, "dur": 11.720, + "args": { + "External id": 455284,"Sequence number": 5036553, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14520 + } + }, + { + "ph": "s", "id": 37, "pid": 2338707, "tid": 2338707, "ts": 6267450442569.967, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450442576.238, "dur": 4.209, + "args": { + "External id": 455285,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450442577.433, "dur": 2.839, + "args": { + "External id": 455286,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450442582.394, "dur": 85.689, + "args": { + "External id": 455287,"Sequence number": 5036554, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450442583.515, "dur": 5.755, + "args": { + "External id": 455288,"Sequence number": 5036554, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450442584.137, "dur": 5.017, + "args": { + "External id": 455289,"Sequence number": 5036554, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14525 + } + }, + { + "ph": "s", "id": 36, "pid": 2338707, "tid": 2338707, "ts": 6267450442584.137, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450442589.722, "dur": 70.370, + "args": { + "External id": 455290,"Sequence number": 5036555, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14526 + } + }, + { + "ph": "s", "id": 35, "pid": 2338707, "tid": 2338707, "ts": 6267450442589.722, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450442662.733, "dur": 4.770, + "args": { + "External id": 455291,"Sequence number": 5036556, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14527 + } + }, + { + "ph": "s", "id": 34, "pid": 2338707, "tid": 2338707, "ts": 6267450442662.733, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450442691.368, "dur": 154.526, + "args": { + "External id": 455292,"Sequence number": 5036557, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14528 + } + }, + { + "ph": "s", "id": 33, "pid": 2338707, "tid": 2338707, "ts": 6267450442691.368, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450442732.676, "dur": 4.513, + "args": { + "External id": 455293,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450442768.935, "dur": 62.970, + "args": { + "External id": 455294,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450442769.674, "dur": 5.764, + "args": { + "External id": 455295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450442771.112, "dur": 3.175, + "args": { + "External id": 455296,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450442773.085, "dur": 1.065, + "args": { + "External id": 455297,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450442776.174, "dur": 55.421, + "args": { + "External id": 455298,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267450442777.402, "dur": 6.583, + "args": { + "External id": 455299,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450442780.617, "dur": 3.273, + "args": { + "External id": 455300,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450442784.503, "dur": 44.041, + "args": { + "External id": 455301,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267450442830.135, "dur": 0.821, + "args": { + "External id": 455302,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2338707, + "ts": 6267450442855.753, "dur": 24.196, + "args": { + "External id": 455303,"Sequence number": 5036558, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14539 + } + }, + { + "ph": "s", "id": 32, "pid": 2338707, "tid": 2338707, "ts": 6267450442855.753, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338707, "tid": 2338707, + "ts": 6267450442898.259, "dur": 44.875, + "args": { + "External id": 455304,"Sequence number": 5036559, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "-2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 14540 + } + }, + { + "ph": "s", "id": 31, "pid": 2338707, "tid": 2338707, "ts": 6267450442898.259, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2338707, "tid": 2338707, + "ts": 6267450442906.325, "dur": 32.411, + "args": { + "External id": 455305,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 14541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450442940.284, "dur": 1.258, + "args": { + "External id": 455306,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 16384], []], "Ev Idx": 14542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267450442979.254, "dur": 52.816, + "args": { + "External id": 455307,"Record function id": 0, "Ev Idx": 14543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 2338707, "tid": 2338707, + "ts": 6267450443033.200, "dur": 249.832, + "args": { + "External id": 455308,"Record function id": 0, "Ev Idx": 14544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450443110.885, "dur": 162.903, + "args": { + "External id": 455309,"Sequence number": 5036560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [67108864, 16384, 4096, 1]], "Input Dims": [[4096], [8, 4096, 4, 4096]], "Ev Idx": 14545 + } + }, + { + "ph": "s", "id": 30, "pid": 2338707, "tid": 2338707, "ts": 6267450443110.885, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267450443190.415, "dur": 41.696, + "args": { + "External id": 455310,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [131072, 4096], [4096], [131072], [], [], [], [], [], [], [], [], []], "Ev Idx": 14546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2338707, + "ts": 6267450443375.324, "dur": 36.461, + "args": { + "External id": 455311,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450443378.376, "dur": 5.329, + "args": { + "External id": 455312,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267450443386.518, "dur": 24.993, + "args": { + "External id": 455313,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 14549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450443389.550, "dur": 21.531, + "args": { + "External id": 455314,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 14550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2338707, + "ts": 6267450443416.607, "dur": 18.910, + "args": { + "External id": 455315,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450443417.450, "dur": 2.557, + "args": { + "External id": 455316,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267450443420.608, "dur": 14.650, + "args": { + "External id": 455317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 14553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450443423.543, "dur": 11.294, + "args": { + "External id": 455318,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 14554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2338707, + "ts": 6267450443437.990, "dur": 18.135, + "args": { + "External id": 455319,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450443438.688, "dur": 4.446, + "args": { + "External id": 455320,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267450443443.813, "dur": 12.061, + "args": { + "External id": 455321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 14557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450443444.514, "dur": 10.823, + "args": { + "External id": 455322,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 14558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450443466.398, "dur": 0.719, + "args": { + "External id": 455323,"Sequence number": 5036561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 14559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338707, "tid": 2338707, + "ts": 6267450443475.729, "dur": 12.001, + "args": { + "External id": 455324,"Sequence number": 5036561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "5", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 14560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443483.410, "dur": 2.253, + "args": { + "External id": 455325,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 14561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450443494.250, "dur": 7.173, + "args": { + "External id": 455326,"Sequence number": 5036561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 14562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443498.408, "dur": 0.869, + "args": { + "External id": 455327,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 14563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450443502.697, "dur": 2.896, + "args": { + "External id": 455328,"Sequence number": 5036561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 14564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443504.232, "dur": 0.593, + "args": { + "External id": 455329,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 14565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450443506.868, "dur": 2.608, + "args": { + "External id": 455330,"Sequence number": 5036561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 14566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443508.382, "dur": 0.573, + "args": { + "External id": 455331,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 14567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450443512.493, "dur": 4.941, + "args": { + "External id": 455332,"Sequence number": 5036561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 14568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443514.017, "dur": 2.833, + "args": { + "External id": 455333,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 14569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450443518.427, "dur": 2.601, + "args": { + "External id": 455334,"Sequence number": 5036561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 14570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443519.713, "dur": 0.690, + "args": { + "External id": 455335,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 14571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450443521.972, "dur": 2.562, + "args": { + "External id": 455336,"Sequence number": 5036561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4096, 4], [], [], [], []], "Ev Idx": 14572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443523.396, "dur": 0.627, + "args": { + "External id": 455337,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 14573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450443529.057, "dur": 4.720, + "args": { + "External id": 455338,"Sequence number": 5036561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4096, 4], [], []], "Ev Idx": 14574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443532.461, "dur": 0.522, + "args": { + "External id": 455339,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 14575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450443537.871, "dur": 2.708, + "args": { + "External id": 455340,"Sequence number": 5036561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 14576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443539.252, "dur": 0.769, + "args": { + "External id": 455341,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 14577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2338707, + "ts": 6267450443544.302, "dur": 8.060, + "args": { + "External id": 455342,"Sequence number": 5036561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 14578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443550.802, "dur": 0.550, + "args": { + "External id": 455343,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 14579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450443553.786, "dur": 2.412, + "args": { + "External id": 455344,"Sequence number": 5036561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 14580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443554.907, "dur": 0.743, + "args": { + "External id": 455345,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 14581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450443558.781, "dur": 5.256, + "args": { + "External id": 455346,"Sequence number": 5036561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 14582 + } + }, + { + "ph": "s", "id": 29, "pid": 2338707, "tid": 2338707, "ts": 6267450443558.781, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443561.832, "dur": 0.673, + "args": { + "External id": 455347,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 14583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450443567.264, "dur": 6.317, + "args": { + "External id": 455348,"Sequence number": 5036562, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 14584 + } + }, + { + "ph": "s", "id": 28, "pid": 2338707, "tid": 2338707, "ts": 6267450443567.264, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443569.833, "dur": 2.993, + "args": { + "External id": 455349,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 14585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2338707, + "ts": 6267450443574.558, "dur": 4.563, + "args": { + "External id": 455350,"Sequence number": 5036563, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 14586 + } + }, + { + "ph": "s", "id": 27, "pid": 2338707, "tid": 2338707, "ts": 6267450443574.558, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443577.814, "dur": 0.490, + "args": { + "External id": 455351,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 14587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450443580.268, "dur": 5.092, + "args": { + "External id": 455352,"Sequence number": 5036564, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 14588 + } + }, + { + "ph": "s", "id": 26, "pid": 2338707, "tid": 2338707, "ts": 6267450443580.268, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443583.573, "dur": 1.037, + "args": { + "External id": 455353,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 14589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267450443589.075, "dur": 61.213, + "args": { + "External id": 455354,"Sequence number": 5036565, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 14590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267450443608.591, "dur": 41.493, + "args": { + "External id": 455355,"Sequence number": 5036565, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 14591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450443612.366, "dur": 7.597, + "args": { + "External id": 455356,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 14592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450443614.445, "dur": 4.637, + "args": { + "External id": 455357,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450443621.520, "dur": 28.038, + "args": { + "External id": 455358,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 14594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450443676.063, "dur": 6.150, + "args": { + "External id": 455359,"Sequence number": 5036565, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14595 + } + }, + { + "ph": "s", "id": 25, "pid": 2338707, "tid": 2338707, "ts": 6267450443676.063, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450443684.487, "dur": 1.169, + "args": { + "External id": 455360,"Sequence number": 5036566, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 14596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450443715.236, "dur": 137268.189, + "args": { + "External id": 455361,"Sequence number": 5036566, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 14597 + } + }, + { + "ph": "s", "id": 24, "pid": 2338707, "tid": 2338707, "ts": 6267450443715.236, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267450443732.670, "dur": 28.351, + "args": { + "External id": 455362,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267450443733.407, "dur": 27.434, + "args": { + "External id": 455363,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450443734.517, "dur": 5.464, + "args": { + "External id": 455364,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450443735.672, "dur": 3.942, + "args": { + "External id": 455365,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450443740.937, "dur": 19.304, + "args": { + "External id": 455366,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 14602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450443780.224, "dur": 24.749, + "args": { + "External id": 455367,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450443781.387, "dur": 6.070, + "args": { + "External id": 455368,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443783.504, "dur": 3.586, + "args": { + "External id": 455369,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267450443788.524, "dur": 16.244, + "args": { + "External id": 455370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 14606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450443789.898, "dur": 14.537, + "args": { + "External id": 455371,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450443808.544, "dur": 20.802, + "args": { + "External id": 455372,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 14608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450443809.343, "dur": 8.132, + "args": { + "External id": 455373,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 14609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443812.527, "dur": 4.739, + "args": { + "External id": 455374,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267450443818.043, "dur": 11.111, + "args": { + "External id": 455375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450443818.532, "dur": 10.322, + "args": { + "External id": 455376,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 14612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2338707, + "ts": 6267450443834.126, "dur": 17.145, + "args": { + "External id": 455377,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450443837.216, "dur": 2.869, + "args": { + "External id": 455378,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267450443840.780, "dur": 10.227, + "args": { + "External id": 455379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 14615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450443841.392, "dur": 9.331, + "args": { + "External id": 455380,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2338707, + "ts": 6267450443860.825, "dur": 29.106, + "args": { + "External id": 455381,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450443894.218, "dur": 71.994, + "args": { + "External id": 455382,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450443897.452, "dur": 68.321, + "args": { + "External id": 455383,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443903.465, "dur": 1.106, + "args": { + "External id": 455384,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 14620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450443906.125, "dur": 35.674, + "args": { + "External id": 455385,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450443907.972, "dur": 33.636, + "args": { + "External id": 455386,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 14622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450443922.464, "dur": 2.854, + "args": { + "External id": 455387,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450443926.238, "dur": 14.889, + "args": { + "External id": 455388,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 14624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2338707, + "ts": 6267450443971.907, "dur": 130746.237, + "args": { + "External id": 455389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 14625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2338707, + "ts": 6267450443973.941, "dur": 130741.800, + "args": { + "External id": 455390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 14626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450574739.897, "dur": 15.201, + "args": { + "External id": 455391,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450574750.485, "dur": 1.938, + "args": { + "External id": 455392,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450574761.904, "dur": 132.789, + "args": { + "External id": 455393,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450574763.765, "dur": 6.962, + "args": { + "External id": 455394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450574766.604, "dur": 3.154, + "args": { + "External id": 455395,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450574768.514, "dur": 0.951, + "args": { + "External id": 455396,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450574772.128, "dur": 121.662, + "args": { + "External id": 455397,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450574780.067, "dur": 112.731, + "args": { + "External id": 455398,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450574899.030, "dur": 4.897, + "args": { + "External id": 455399,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450574901.346, "dur": 0.911, + "args": { + "External id": 455400,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450574914.771, "dur": 9.371, + "args": { + "External id": 455401,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450574938.623, "dur": 7.684, + "args": { + "External id": 455402,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450574941.333, "dur": 4.692, + "args": { + "External id": 455403,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450575137.290, "dur": 249.201, + "args": { + "External id": 455404,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450575142.912, "dur": 3.405, + "args": { + "External id": 455405,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450575149.438, "dur": 236.454, + "args": { + "External id": 455406,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450575152.690, "dur": 0.795, + "args": { + "External id": 455407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450575156.164, "dur": 32.715, + "args": { + "External id": 455408,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450575190.739, "dur": 6.296, + "args": { + "External id": 455409,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450575195.434, "dur": 1.000, + "args": { + "External id": 455410,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450575198.806, "dur": 29.580, + "args": { + "External id": 455411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450575199.926, "dur": 1.401, + "args": { + "External id": 455412,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450575206.061, "dur": 22.033, + "args": { + "External id": 455413,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450575211.099, "dur": 2.942, + "args": { + "External id": 455414,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450575235.196, "dur": 27.931, + "args": { + "External id": 455415,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450575266.021, "dur": 18.235, + "args": { + "External id": 455416,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450575288.613, "dur": 18.605, + "args": { + "External id": 455417,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450575309.887, "dur": 18.059, + "args": { + "External id": 455418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450575330.572, "dur": 25.711, + "args": { + "External id": 455419,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450575333.123, "dur": 1.863, + "args": { + "External id": 455420,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450575339.212, "dur": 0.596, + "args": { + "External id": 455421,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450575358.675, "dur": 13.430, + "args": { + "External id": 455422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450575373.698, "dur": 11.194, + "args": { + "External id": 455423,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450575395.253, "dur": 2.836, + "args": { + "External id": 455424,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450575406.606, "dur": 4.570, + "args": { + "External id": 455425,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450575409.461, "dur": 0.679, + "args": { + "External id": 455426,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450575501.639, "dur": 83.214, + "args": { + "External id": 455427,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450575591.236, "dur": 24.305, + "args": { + "External id": 455428,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450575612.301, "dur": 1.147, + "args": { + "External id": 455429,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450575617.246, "dur": 34.757, + "args": { + "External id": 455430,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450575659.251, "dur": 8.810, + "args": { + "External id": 455431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450575661.141, "dur": 6.179, + "args": { + "External id": 455432,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450575663.329, "dur": 3.716, + "args": { + "External id": 455433,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450575672.815, "dur": 60.179, + "args": { + "External id": 455434,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450575677.013, "dur": 55.483, + "args": { + "External id": 455435,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450575738.348, "dur": 18.409, + "args": { + "External id": 455436,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450575764.444, "dur": 4.155, + "args": { + "External id": 455437,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450575766.668, "dur": 0.858, + "args": { + "External id": 455438,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450575773.541, "dur": 50.755, + "args": { + "External id": 455439,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450575774.620, "dur": 6.473, + "args": { + "External id": 455440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450575775.868, "dur": 4.445, + "args": { + "External id": 455441,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450575779.487, "dur": 0.681, + "args": { + "External id": 455442,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450575781.629, "dur": 42.091, + "args": { + "External id": 455443,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450575782.230, "dur": 40.894, + "args": { + "External id": 455444,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450575828.774, "dur": 5.976, + "args": { + "External id": 455445,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450575830.603, "dur": 2.851, + "args": { + "External id": 455446,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450575841.254, "dur": 2.079, + "args": { + "External id": 455447,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450575855.821, "dur": 6.642, + "args": { + "External id": 455448,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450575858.322, "dur": 3.894, + "args": { + "External id": 455449,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450575969.903, "dur": 256.691, + "args": { + "External id": 455450,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450575971.774, "dur": 1.758, + "args": { + "External id": 455451,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450575975.882, "dur": 250.253, + "args": { + "External id": 455452,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450575977.955, "dur": 0.393, + "args": { + "External id": 455453,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450575980.228, "dur": 27.875, + "args": { + "External id": 455454,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450576012.318, "dur": 3.078, + "args": { + "External id": 455455,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450576014.235, "dur": 0.889, + "args": { + "External id": 455456,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450576016.758, "dur": 25.552, + "args": { + "External id": 455457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450576017.712, "dur": 1.360, + "args": { + "External id": 455458,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450576020.535, "dur": 21.356, + "args": { + "External id": 455459,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450576025.391, "dur": 2.356, + "args": { + "External id": 455460,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450576043.988, "dur": 71.507, + "args": { + "External id": 455461,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450576118.312, "dur": 16.314, + "args": { + "External id": 455462,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450576138.076, "dur": 15.247, + "args": { + "External id": 455463,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450576155.376, "dur": 13.829, + "args": { + "External id": 455464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450576171.299, "dur": 27.549, + "args": { + "External id": 455465,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450576175.943, "dur": 3.388, + "args": { + "External id": 455466,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450576181.296, "dur": 0.812, + "args": { + "External id": 455467,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450576200.483, "dur": 12.850, + "args": { + "External id": 455468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450576214.434, "dur": 10.408, + "args": { + "External id": 455469,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450576236.369, "dur": 2.754, + "args": { + "External id": 455470,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450576249.656, "dur": 4.146, + "args": { + "External id": 455471,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450576252.362, "dur": 0.636, + "args": { + "External id": 455472,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450576336.374, "dur": 66.003, + "args": { + "External id": 455473,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450576408.366, "dur": 4.958, + "args": { + "External id": 455474,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450576411.048, "dur": 0.963, + "args": { + "External id": 455475,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450576414.841, "dur": 25.795, + "args": { + "External id": 455476,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450576445.482, "dur": 7.641, + "args": { + "External id": 455477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450576447.193, "dur": 5.003, + "args": { + "External id": 455478,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450576451.016, "dur": 0.938, + "args": { + "External id": 455479,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450576456.359, "dur": 43.708, + "args": { + "External id": 455480,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450576457.806, "dur": 41.463, + "args": { + "External id": 455481,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450576504.445, "dur": 14.407, + "args": { + "External id": 455482,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450576524.910, "dur": 3.905, + "args": { + "External id": 455483,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450576527.044, "dur": 0.866, + "args": { + "External id": 455484,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450576533.352, "dur": 49.855, + "args": { + "External id": 455485,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450576536.308, "dur": 5.471, + "args": { + "External id": 455486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450576537.199, "dur": 3.914, + "args": { + "External id": 455487,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450576540.390, "dur": 0.573, + "args": { + "External id": 455488,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450576542.533, "dur": 40.245, + "args": { + "External id": 455489,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450576543.301, "dur": 38.602, + "args": { + "External id": 455490,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450576587.594, "dur": 21.736, + "args": { + "External id": 455491,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450576589.457, "dur": 17.588, + "args": { + "External id": 455492,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450576620.914, "dur": 1.924, + "args": { + "External id": 455493,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450576632.443, "dur": 7.427, + "args": { + "External id": 455494,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450576634.517, "dur": 5.088, + "args": { + "External id": 455495,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450576747.983, "dur": 268.149, + "args": { + "External id": 455496,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450576750.393, "dur": 1.768, + "args": { + "External id": 455497,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450576754.010, "dur": 261.464, + "args": { + "External id": 455498,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450576761.306, "dur": 0.356, + "args": { + "External id": 455499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450576762.995, "dur": 23.477, + "args": { + "External id": 455500,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450576788.108, "dur": 3.709, + "args": { + "External id": 455501,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450576790.465, "dur": 1.058, + "args": { + "External id": 455502,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450576792.677, "dur": 25.293, + "args": { + "External id": 455503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450576795.936, "dur": 1.330, + "args": { + "External id": 455504,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450576798.849, "dur": 18.796, + "args": { + "External id": 455505,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450576802.120, "dur": 2.694, + "args": { + "External id": 455506,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450576819.782, "dur": 24.157, + "args": { + "External id": 455507,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450576845.519, "dur": 30.228, + "args": { + "External id": 455508,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450576881.077, "dur": 38.567, + "args": { + "External id": 455509,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450576921.350, "dur": 39.594, + "args": { + "External id": 455510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450576962.767, "dur": 21.660, + "args": { + "External id": 455511,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450576964.926, "dur": 1.849, + "args": { + "External id": 455512,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450576968.420, "dur": 0.715, + "args": { + "External id": 455513,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450576985.944, "dur": 13.428, + "args": { + "External id": 455514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450577003.092, "dur": 11.235, + "args": { + "External id": 455515,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450577023.573, "dur": 1.772, + "args": { + "External id": 455516,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450577035.686, "dur": 3.645, + "args": { + "External id": 455517,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450577038.037, "dur": 0.554, + "args": { + "External id": 455518,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450577156.998, "dur": 68.200, + "args": { + "External id": 455519,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450577232.264, "dur": 8.261, + "args": { + "External id": 455520,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450577235.602, "dur": 3.276, + "args": { + "External id": 455521,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450577242.225, "dur": 29.659, + "args": { + "External id": 455522,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450577279.029, "dur": 5.701, + "args": { + "External id": 455523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450577281.132, "dur": 2.920, + "args": { + "External id": 455524,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450577283.031, "dur": 0.793, + "args": { + "External id": 455525,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450577287.672, "dur": 44.003, + "args": { + "External id": 455526,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450577288.799, "dur": 42.225, + "args": { + "External id": 455527,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450577335.921, "dur": 14.959, + "args": { + "External id": 455528,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450577357.243, "dur": 5.752, + "args": { + "External id": 455529,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450577361.288, "dur": 0.737, + "args": { + "External id": 455530,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450577367.942, "dur": 47.830, + "args": { + "External id": 455531,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450577369.197, "dur": 3.928, + "args": { + "External id": 455532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450577370.205, "dur": 2.333, + "args": { + "External id": 455533,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450577371.559, "dur": 0.820, + "args": { + "External id": 455534,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450577373.822, "dur": 41.478, + "args": { + "External id": 455535,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450577376.787, "dur": 37.859, + "args": { + "External id": 455536,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450577419.924, "dur": 6.684, + "args": { + "External id": 455537,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450577421.717, "dur": 3.642, + "args": { + "External id": 455538,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450577433.823, "dur": 1.764, + "args": { + "External id": 455539,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450577444.093, "dur": 6.827, + "args": { + "External id": 455540,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450577446.368, "dur": 4.258, + "args": { + "External id": 455541,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450577544.099, "dur": 214.085, + "args": { + "External id": 455542,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450577546.271, "dur": 4.218, + "args": { + "External id": 455543,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450577552.019, "dur": 205.382, + "args": { + "External id": 455544,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450577553.384, "dur": 0.418, + "args": { + "External id": 455545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450577555.073, "dur": 20.058, + "args": { + "External id": 455546,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450577576.928, "dur": 5.005, + "args": { + "External id": 455547,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450577580.988, "dur": 0.763, + "args": { + "External id": 455548,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450577582.993, "dur": 42.040, + "args": { + "External id": 455549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450577584.090, "dur": 1.055, + "args": { + "External id": 455550,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450577588.624, "dur": 36.022, + "args": { + "External id": 455551,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450577607.584, "dur": 3.493, + "args": { + "External id": 455552,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450577626.904, "dur": 23.229, + "args": { + "External id": 455553,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450577651.749, "dur": 15.809, + "args": { + "External id": 455554,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450577670.230, "dur": 13.614, + "args": { + "External id": 455555,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450577685.526, "dur": 13.643, + "args": { + "External id": 455556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450577701.380, "dur": 24.232, + "args": { + "External id": 455557,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450577703.783, "dur": 1.801, + "args": { + "External id": 455558,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450577707.540, "dur": 2.766, + "args": { + "External id": 455559,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450577729.027, "dur": 14.475, + "args": { + "External id": 455560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450577744.545, "dur": 11.731, + "args": { + "External id": 455561,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450577766.309, "dur": 2.474, + "args": { + "External id": 455562,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450577778.627, "dur": 4.010, + "args": { + "External id": 455563,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450577781.173, "dur": 0.460, + "args": { + "External id": 455564,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450577856.520, "dur": 59.816, + "args": { + "External id": 455565,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450577921.506, "dur": 4.788, + "args": { + "External id": 455566,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450577924.388, "dur": 0.861, + "args": { + "External id": 455567,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450577930.222, "dur": 26.510, + "args": { + "External id": 455568,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450577961.401, "dur": 6.193, + "args": { + "External id": 455569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450577962.853, "dur": 3.919, + "args": { + "External id": 455570,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450577965.047, "dur": 1.517, + "args": { + "External id": 455571,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450577970.008, "dur": 42.866, + "args": { + "External id": 455572,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450577971.145, "dur": 41.026, + "args": { + "External id": 455573,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450578016.948, "dur": 14.877, + "args": { + "External id": 455574,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450578039.995, "dur": 2.972, + "args": { + "External id": 455575,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450578041.708, "dur": 0.508, + "args": { + "External id": 455576,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450578046.912, "dur": 89.648, + "args": { + "External id": 455577,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450578047.892, "dur": 3.714, + "args": { + "External id": 455578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450578049.177, "dur": 1.842, + "args": { + "External id": 455579,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450578050.216, "dur": 0.656, + "args": { + "External id": 455580,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450578089.821, "dur": 46.250, + "args": { + "External id": 455581,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450578090.691, "dur": 44.630, + "args": { + "External id": 455582,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450578142.558, "dur": 6.626, + "args": { + "External id": 455583,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450578144.511, "dur": 3.300, + "args": { + "External id": 455584,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450578156.248, "dur": 1.922, + "args": { + "External id": 455585,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450578167.402, "dur": 9.183, + "args": { + "External id": 455586,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450578172.198, "dur": 4.111, + "args": { + "External id": 455587,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450578267.064, "dur": 192.413, + "args": { + "External id": 455588,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450578271.444, "dur": 1.856, + "args": { + "External id": 455589,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450578275.053, "dur": 183.805, + "args": { + "External id": 455590,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450578276.370, "dur": 0.541, + "args": { + "External id": 455591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450578278.242, "dur": 21.719, + "args": { + "External id": 455592,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450578301.669, "dur": 5.486, + "args": { + "External id": 455593,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450578306.041, "dur": 0.870, + "args": { + "External id": 455594,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450578308.237, "dur": 23.032, + "args": { + "External id": 455595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450578311.427, "dur": 1.043, + "args": { + "External id": 455596,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450578313.790, "dur": 17.164, + "args": { + "External id": 455597,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450578316.912, "dur": 2.842, + "args": { + "External id": 455598,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450578332.788, "dur": 22.327, + "args": { + "External id": 455599,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450578356.522, "dur": 12.258, + "args": { + "External id": 455600,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450578371.745, "dur": 14.206, + "args": { + "External id": 455601,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450578387.475, "dur": 11.627, + "args": { + "External id": 455602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450578400.791, "dur": 24.493, + "args": { + "External id": 455603,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450578404.976, "dur": 2.301, + "args": { + "External id": 455604,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450578411.740, "dur": 0.540, + "args": { + "External id": 455605,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450578426.664, "dur": 11.207, + "args": { + "External id": 455606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450578438.844, "dur": 18.941, + "args": { + "External id": 455607,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450578466.390, "dur": 1.656, + "args": { + "External id": 455608,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450578476.714, "dur": 3.621, + "args": { + "External id": 455609,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450578478.973, "dur": 0.581, + "args": { + "External id": 455610,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450578549.610, "dur": 69.372, + "args": { + "External id": 455611,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450578628.365, "dur": 6.295, + "args": { + "External id": 455612,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450578631.991, "dur": 1.165, + "args": { + "External id": 455613,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450578635.909, "dur": 25.187, + "args": { + "External id": 455614,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450578666.264, "dur": 6.962, + "args": { + "External id": 455615,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450578668.142, "dur": 4.289, + "args": { + "External id": 455616,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450578670.602, "dur": 1.634, + "args": { + "External id": 455617,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450578678.959, "dur": 47.227, + "args": { + "External id": 455618,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450578680.389, "dur": 45.059, + "args": { + "External id": 455619,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450578730.329, "dur": 15.520, + "args": { + "External id": 455620,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450578751.811, "dur": 4.247, + "args": { + "External id": 455621,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450578754.176, "dur": 0.970, + "args": { + "External id": 455622,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450578760.362, "dur": 46.354, + "args": { + "External id": 455623,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450578761.054, "dur": 6.243, + "args": { + "External id": 455624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450578764.273, "dur": 2.394, + "args": { + "External id": 455625,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450578766.049, "dur": 0.485, + "args": { + "External id": 455626,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450578767.777, "dur": 38.467, + "args": { + "External id": 455627,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450578768.216, "dur": 37.503, + "args": { + "External id": 455628,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450578810.849, "dur": 6.811, + "args": { + "External id": 455629,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450578813.384, "dur": 3.099, + "args": { + "External id": 455630,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450578824.050, "dur": 1.501, + "args": { + "External id": 455631,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450578854.262, "dur": 8.392, + "args": { + "External id": 455632,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450578856.360, "dur": 5.994, + "args": { + "External id": 455633,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450578948.583, "dur": 222.181, + "args": { + "External id": 455634,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450578950.487, "dur": 2.196, + "args": { + "External id": 455635,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450578954.267, "dur": 215.683, + "args": { + "External id": 455636,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450578955.984, "dur": 0.353, + "args": { + "External id": 455637,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450578959.990, "dur": 22.180, + "args": { + "External id": 455638,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450578983.947, "dur": 2.690, + "args": { + "External id": 455639,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450578985.650, "dur": 0.819, + "args": { + "External id": 455640,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450578987.505, "dur": 21.952, + "args": { + "External id": 455641,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450578988.522, "dur": 1.418, + "args": { + "External id": 455642,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450578991.313, "dur": 17.790, + "args": { + "External id": 455643,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450578995.793, "dur": 2.136, + "args": { + "External id": 455644,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450579010.628, "dur": 21.863, + "args": { + "External id": 455645,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450579034.034, "dur": 13.110, + "args": { + "External id": 455646,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450579049.610, "dur": 52.209, + "args": { + "External id": 455647,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450579104.403, "dur": 12.480, + "args": { + "External id": 455648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450579123.733, "dur": 20.438, + "args": { + "External id": 455649,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450579125.895, "dur": 2.541, + "args": { + "External id": 455650,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450579130.117, "dur": 0.693, + "args": { + "External id": 455651,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450579145.756, "dur": 11.515, + "args": { + "External id": 455652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450579158.630, "dur": 10.261, + "args": { + "External id": 455653,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450579179.658, "dur": 2.368, + "args": { + "External id": 455654,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450579191.231, "dur": 3.686, + "args": { + "External id": 455655,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450579193.797, "dur": 0.327, + "args": { + "External id": 455656,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450579269.304, "dur": 60.031, + "args": { + "External id": 455657,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450579334.955, "dur": 7.025, + "args": { + "External id": 455658,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450579337.523, "dur": 3.125, + "args": { + "External id": 455659,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450579343.596, "dur": 26.122, + "args": { + "External id": 455660,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450579374.269, "dur": 7.560, + "args": { + "External id": 455661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450579376.198, "dur": 4.709, + "args": { + "External id": 455662,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450579379.858, "dur": 0.910, + "args": { + "External id": 455663,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450579384.775, "dur": 43.160, + "args": { + "External id": 455664,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450579385.750, "dur": 41.653, + "args": { + "External id": 455665,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450579431.900, "dur": 14.711, + "args": { + "External id": 455666,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450579452.549, "dur": 3.423, + "args": { + "External id": 455667,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450579454.449, "dur": 0.620, + "args": { + "External id": 455668,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450579460.325, "dur": 46.558, + "args": { + "External id": 455669,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450579463.625, "dur": 2.931, + "args": { + "External id": 455670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450579464.400, "dur": 1.496, + "args": { + "External id": 455671,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450579465.181, "dur": 0.602, + "args": { + "External id": 455672,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450579467.093, "dur": 39.299, + "args": { + "External id": 455673,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450579467.733, "dur": 38.132, + "args": { + "External id": 455674,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450579511.305, "dur": 5.593, + "args": { + "External id": 455675,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450579512.853, "dur": 2.809, + "args": { + "External id": 455676,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450579524.389, "dur": 1.443, + "args": { + "External id": 455677,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450579533.852, "dur": 6.202, + "args": { + "External id": 455678,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450579535.890, "dur": 3.817, + "args": { + "External id": 455679,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450579645.047, "dur": 220.578, + "args": { + "External id": 455680,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450579675.325, "dur": 5.302, + "args": { + "External id": 455681,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450579682.861, "dur": 182.319, + "args": { + "External id": 455682,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450579686.136, "dur": 0.343, + "args": { + "External id": 455683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450579687.531, "dur": 22.094, + "args": { + "External id": 455684,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450579711.484, "dur": 5.661, + "args": { + "External id": 455685,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450579716.144, "dur": 0.766, + "args": { + "External id": 455686,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450579718.082, "dur": 21.005, + "args": { + "External id": 455687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450579718.985, "dur": 1.468, + "args": { + "External id": 455688,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450579721.835, "dur": 16.984, + "args": { + "External id": 455689,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450579724.460, "dur": 2.830, + "args": { + "External id": 455690,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450579740.437, "dur": 22.459, + "args": { + "External id": 455691,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450579764.557, "dur": 12.743, + "args": { + "External id": 455692,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450579782.392, "dur": 14.781, + "args": { + "External id": 455693,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450579798.572, "dur": 11.275, + "args": { + "External id": 455694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450579811.711, "dur": 22.741, + "args": { + "External id": 455695,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450579815.474, "dur": 1.968, + "args": { + "External id": 455696,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450579819.310, "dur": 0.565, + "args": { + "External id": 455697,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450579835.865, "dur": 13.034, + "args": { + "External id": 455698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450579852.656, "dur": 11.678, + "args": { + "External id": 455699,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450579873.789, "dur": 2.174, + "args": { + "External id": 455700,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450579885.989, "dur": 3.471, + "args": { + "External id": 455701,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450579888.260, "dur": 0.376, + "args": { + "External id": 455702,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450579958.616, "dur": 65.784, + "args": { + "External id": 455703,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450580029.573, "dur": 4.160, + "args": { + "External id": 455704,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450580031.967, "dur": 0.508, + "args": { + "External id": 455705,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450580035.402, "dur": 62.704, + "args": { + "External id": 455706,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450580107.598, "dur": 7.440, + "args": { + "External id": 455707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450580109.433, "dur": 4.721, + "args": { + "External id": 455708,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450580111.973, "dur": 1.932, + "args": { + "External id": 455709,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450580118.410, "dur": 55.394, + "args": { + "External id": 455710,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450580119.567, "dur": 53.560, + "args": { + "External id": 455711,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450580178.726, "dur": 18.459, + "args": { + "External id": 455712,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450580204.305, "dur": 6.884, + "args": { + "External id": 455713,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450580209.347, "dur": 0.660, + "args": { + "External id": 455714,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450580215.789, "dur": 50.358, + "args": { + "External id": 455715,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450580216.795, "dur": 3.972, + "args": { + "External id": 455716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450580217.679, "dur": 2.496, + "args": { + "External id": 455717,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450580218.834, "dur": 1.179, + "args": { + "External id": 455718,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450580221.656, "dur": 44.046, + "args": { + "External id": 455719,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450580224.705, "dur": 40.270, + "args": { + "External id": 455720,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450580270.594, "dur": 6.274, + "args": { + "External id": 455721,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450580272.260, "dur": 3.296, + "args": { + "External id": 455722,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450580284.660, "dur": 1.879, + "args": { + "External id": 455723,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450580295.762, "dur": 6.630, + "args": { + "External id": 455724,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450580297.946, "dur": 4.169, + "args": { + "External id": 455725,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450580395.958, "dur": 193.737, + "args": { + "External id": 455726,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450580400.415, "dur": 1.775, + "args": { + "External id": 455727,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450580403.822, "dur": 185.386, + "args": { + "External id": 455728,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450580405.416, "dur": 0.349, + "args": { + "External id": 455729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450580407.094, "dur": 24.051, + "args": { + "External id": 455730,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450580432.841, "dur": 5.605, + "args": { + "External id": 455731,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450580437.229, "dur": 1.007, + "args": { + "External id": 455732,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450580439.526, "dur": 23.999, + "args": { + "External id": 455733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450580440.516, "dur": 1.418, + "args": { + "External id": 455734,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450580445.721, "dur": 17.527, + "args": { + "External id": 455735,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450580448.291, "dur": 2.310, + "args": { + "External id": 455736,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450580465.334, "dur": 24.548, + "args": { + "External id": 455737,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450580491.334, "dur": 13.421, + "args": { + "External id": 455738,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450580507.488, "dur": 12.888, + "args": { + "External id": 455739,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450580521.912, "dur": 11.920, + "args": { + "External id": 455740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450580535.721, "dur": 20.024, + "args": { + "External id": 455741,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450580537.998, "dur": 2.072, + "args": { + "External id": 455742,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450580541.863, "dur": 0.404, + "args": { + "External id": 455743,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450580559.568, "dur": 12.367, + "args": { + "External id": 455744,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450580573.318, "dur": 14.770, + "args": { + "External id": 455745,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450580613.910, "dur": 2.348, + "args": { + "External id": 455746,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450580626.753, "dur": 4.139, + "args": { + "External id": 455747,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450580629.333, "dur": 0.648, + "args": { + "External id": 455748,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450580699.658, "dur": 57.640, + "args": { + "External id": 455749,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450580762.555, "dur": 4.577, + "args": { + "External id": 455750,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450580765.293, "dur": 0.552, + "args": { + "External id": 455751,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450580770.686, "dur": 25.077, + "args": { + "External id": 455752,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450580800.311, "dur": 5.551, + "args": { + "External id": 455753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450580802.049, "dur": 3.145, + "args": { + "External id": 455754,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450580803.707, "dur": 1.271, + "args": { + "External id": 455755,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450580808.803, "dur": 41.726, + "args": { + "External id": 455756,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450580810.369, "dur": 39.565, + "args": { + "External id": 455757,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450580854.171, "dur": 14.766, + "args": { + "External id": 455758,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450580876.158, "dur": 25.685, + "args": { + "External id": 455759,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450580878.545, "dur": 22.796, + "args": { + "External id": 455760,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450580885.831, "dur": 0.451, + "args": { + "External id": 455761,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 14997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450580907.800, "dur": 27.002, + "args": { + "External id": 455762,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 14998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450580910.196, "dur": 24.363, + "args": { + "External id": 455763,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 14999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450580914.731, "dur": 3.653, + "args": { + "External id": 455764,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450580919.623, "dur": 14.402, + "args": { + "External id": 455765,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2338707, + "ts": 6267450580949.570, "dur": 7.936, + "args": { + "External id": 455766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2338707, + "ts": 6267450580953.805, "dur": 3.403, + "args": { + "External id": 455767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2338707, + "ts": 6267450580958.655, "dur": 1.108, + "args": { + "External id": 455768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2338707, + "ts": 6267450580959.214, "dur": 0.480, + "args": { + "External id": 455769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450581001.964, "dur": 29.142, + "args": { + "External id": 455770,"Sequence number": 5036567, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15006 + } + }, + { + "ph": "s", "id": 23, "pid": 2338707, "tid": 2338707, "ts": 6267450581001.964, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450581036.816, "dur": 5.907, + "args": { + "External id": 455771,"Sequence number": 5036568, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 15007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450581039.909, "dur": 1.236, + "args": { + "External id": 455772,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2338707, + "ts": 6267450581045.145, "dur": 8.348, + "args": { + "External id": 455773,"Sequence number": 5036568, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 15009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450581051.916, "dur": 0.347, + "args": { + "External id": 455774,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450581054.594, "dur": 40.724, + "args": { + "External id": 455775,"Sequence number": 5036568, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 15011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450581092.936, "dur": 0.996, + "args": { + "External id": 455776,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 15012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450581101.703, "dur": 6.742, + "args": { + "External id": 455777,"Sequence number": 5036568, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15013 + } + }, + { + "ph": "s", "id": 22, "pid": 2338707, "tid": 2338707, "ts": 6267450581101.703, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450581105.557, "dur": 0.939, + "args": { + "External id": 455778,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450581111.879, "dur": 4.420, + "args": { + "External id": 455779,"Sequence number": 5036569, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15015 + } + }, + { + "ph": "s", "id": 21, "pid": 2338707, "tid": 2338707, "ts": 6267450581111.879, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450581114.803, "dur": 0.709, + "args": { + "External id": 455780,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2338707, + "ts": 6267450581118.014, "dur": 8.023, + "args": { + "External id": 455781,"Sequence number": 5036570, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 15017 + } + }, + { + "ph": "s", "id": 20, "pid": 2338707, "tid": 2338707, "ts": 6267450581118.014, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450581122.369, "dur": 2.694, + "args": { + "External id": 455782,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450581127.065, "dur": 4.366, + "args": { + "External id": 455783,"Sequence number": 5036571, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 15019 + } + }, + { + "ph": "s", "id": 19, "pid": 2338707, "tid": 2338707, "ts": 6267450581127.065, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450581129.810, "dur": 0.686, + "args": { + "External id": 455784,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 15020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267450581135.363, "dur": 39.478, + "args": { + "External id": 455785,"Sequence number": 5036572, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267450581139.443, "dur": 35.147, + "args": { + "External id": 455786,"Sequence number": 5036572, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450581142.445, "dur": 7.885, + "args": { + "External id": 455787,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 15023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450581144.924, "dur": 4.862, + "args": { + "External id": 455788,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450581151.577, "dur": 22.472, + "args": { + "External id": 455789,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 15025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450581204.040, "dur": 3.884, + "args": { + "External id": 455790,"Sequence number": 5036572, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 15026 + } + }, + { + "ph": "s", "id": 18, "pid": 2338707, "tid": 2338707, "ts": 6267450581204.040, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450581210.343, "dur": 0.909, + "args": { + "External id": 455791,"Sequence number": 5036573, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450581247.358, "dur": 45167.018, + "args": { + "External id": 455792,"Sequence number": 5036573, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 15028 + } + }, + { + "ph": "s", "id": 17, "pid": 2338707, "tid": 2338707, "ts": 6267450581247.358, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267450581263.530, "dur": 28.022, + "args": { + "External id": 455793,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267450581264.293, "dur": 27.038, + "args": { + "External id": 455794,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450581265.557, "dur": 8.095, + "args": { + "External id": 455795,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450581267.246, "dur": 5.981, + "args": { + "External id": 455796,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450581274.501, "dur": 16.498, + "args": { + "External id": 455797,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 15033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450581308.788, "dur": 24.989, + "args": { + "External id": 455798,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450581309.942, "dur": 5.236, + "args": { + "External id": 455799,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450581311.678, "dur": 3.160, + "args": { + "External id": 455800,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267450581316.481, "dur": 17.082, + "args": { + "External id": 455801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450581318.230, "dur": 14.927, + "args": { + "External id": 455802,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450581337.670, "dur": 23.582, + "args": { + "External id": 455803,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450581338.806, "dur": 7.413, + "args": { + "External id": 455804,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450581342.487, "dur": 3.503, + "args": { + "External id": 455805,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267450581346.902, "dur": 14.139, + "args": { + "External id": 455806,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450581347.694, "dur": 13.036, + "args": { + "External id": 455807,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 15043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2338707, + "ts": 6267450581367.010, "dur": 19.535, + "args": { + "External id": 455808,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 15044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450581368.917, "dur": 5.018, + "args": { + "External id": 455809,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267450581374.666, "dur": 11.605, + "args": { + "External id": 455810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 15046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450581375.390, "dur": 10.444, + "args": { + "External id": 455811,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2338707, + "ts": 6267450581393.850, "dur": 22.775, + "args": { + "External id": 455812,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450581419.348, "dur": 46.883, + "args": { + "External id": 455813,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450581421.401, "dur": 44.474, + "args": { + "External id": 455814,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450581427.670, "dur": 0.858, + "args": { + "External id": 455815,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450581430.117, "dur": 20.513, + "args": { + "External id": 455816,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450581432.010, "dur": 18.384, + "args": { + "External id": 455817,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 15053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450581434.916, "dur": 2.518, + "args": { + "External id": 455818,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450581438.378, "dur": 11.660, + "args": { + "External id": 455819,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 15055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2338707, + "ts": 6267450581469.681, "dur": 39080.770, + "args": { + "External id": 455820,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2338707, + "ts": 6267450581471.250, "dur": 39077.591, + "args": { + "External id": 455821,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450620564.083, "dur": 9.148, + "args": { + "External id": 455822,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450620570.549, "dur": 0.978, + "args": { + "External id": 455823,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450620578.474, "dur": 120.273, + "args": { + "External id": 455824,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450620579.891, "dur": 6.808, + "args": { + "External id": 455825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450620582.243, "dur": 3.392, + "args": { + "External id": 455826,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450620584.613, "dur": 0.754, + "args": { + "External id": 455827,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450620587.895, "dur": 110.119, + "args": { + "External id": 455828,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450620601.167, "dur": 95.838, + "args": { + "External id": 455829,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450620703.431, "dur": 5.418, + "args": { + "External id": 455830,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450620706.435, "dur": 0.916, + "args": { + "External id": 455831,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450620716.586, "dur": 2.501, + "args": { + "External id": 455832,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450620730.489, "dur": 9.916, + "args": { + "External id": 455833,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450620732.843, "dur": 7.273, + "args": { + "External id": 455834,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450620878.451, "dur": 253.120, + "args": { + "External id": 455835,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450620882.834, "dur": 1.750, + "args": { + "External id": 455836,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450620886.704, "dur": 244.207, + "args": { + "External id": 455837,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450620888.105, "dur": 0.508, + "args": { + "External id": 455838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450620890.467, "dur": 28.345, + "args": { + "External id": 455839,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450620920.784, "dur": 5.674, + "args": { + "External id": 455840,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450620925.411, "dur": 0.760, + "args": { + "External id": 455841,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450620927.544, "dur": 27.947, + "args": { + "External id": 455842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450620928.684, "dur": 1.062, + "args": { + "External id": 455843,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450620933.393, "dur": 21.750, + "args": { + "External id": 455844,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450620937.784, "dur": 3.492, + "args": { + "External id": 455845,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450620957.230, "dur": 25.955, + "args": { + "External id": 455846,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450620985.156, "dur": 14.682, + "args": { + "External id": 455847,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450621003.169, "dur": 15.208, + "args": { + "External id": 455848,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450621019.905, "dur": 13.303, + "args": { + "External id": 455849,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450621035.223, "dur": 64.704, + "args": { + "External id": 455850,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450621037.940, "dur": 1.512, + "args": { + "External id": 455851,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450621043.849, "dur": 0.580, + "args": { + "External id": 455852,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450621102.907, "dur": 14.088, + "args": { + "External id": 455853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450621118.525, "dur": 11.279, + "args": { + "External id": 455854,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450621140.659, "dur": 2.558, + "args": { + "External id": 455855,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450621150.898, "dur": 4.507, + "args": { + "External id": 455856,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450621153.644, "dur": 0.409, + "args": { + "External id": 455857,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450621235.833, "dur": 74.098, + "args": { + "External id": 455858,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450621315.534, "dur": 8.502, + "args": { + "External id": 455859,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450621320.499, "dur": 0.729, + "args": { + "External id": 455860,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450621325.623, "dur": 27.766, + "args": { + "External id": 455861,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450621359.771, "dur": 7.382, + "args": { + "External id": 455862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450621362.280, "dur": 4.126, + "args": { + "External id": 455863,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450621364.669, "dur": 1.470, + "args": { + "External id": 455864,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450621370.673, "dur": 50.245, + "args": { + "External id": 455865,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450621374.286, "dur": 46.053, + "args": { + "External id": 455866,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450621434.803, "dur": 17.889, + "args": { + "External id": 455867,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450621459.383, "dur": 3.956, + "args": { + "External id": 455868,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450621461.474, "dur": 0.893, + "args": { + "External id": 455869,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450621467.875, "dur": 47.014, + "args": { + "External id": 455870,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450621469.111, "dur": 5.405, + "args": { + "External id": 455871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450621471.936, "dur": 1.975, + "args": { + "External id": 455872,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450621473.171, "dur": 0.609, + "args": { + "External id": 455873,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450621475.257, "dur": 39.196, + "args": { + "External id": 455874,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450621476.113, "dur": 37.652, + "args": { + "External id": 455875,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450621519.332, "dur": 5.519, + "args": { + "External id": 455876,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450621521.041, "dur": 2.579, + "args": { + "External id": 455877,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450621530.606, "dur": 1.444, + "args": { + "External id": 455878,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450621542.949, "dur": 6.196, + "args": { + "External id": 455879,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450621544.952, "dur": 3.941, + "args": { + "External id": 455880,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450621667.012, "dur": 190.348, + "args": { + "External id": 455881,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450621669.123, "dur": 2.908, + "args": { + "External id": 455882,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450621674.499, "dur": 182.081, + "args": { + "External id": 455883,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450621675.869, "dur": 0.350, + "args": { + "External id": 455884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450621679.696, "dur": 25.930, + "args": { + "External id": 455885,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450621707.365, "dur": 2.981, + "args": { + "External id": 455886,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450621709.419, "dur": 0.670, + "args": { + "External id": 455887,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450621711.300, "dur": 27.616, + "args": { + "External id": 455888,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450621712.267, "dur": 3.228, + "args": { + "External id": 455889,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450621717.012, "dur": 21.601, + "args": { + "External id": 455890,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450621723.016, "dur": 2.711, + "args": { + "External id": 455891,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450621740.616, "dur": 20.784, + "args": { + "External id": 455892,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450621763.043, "dur": 13.869, + "args": { + "External id": 455893,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450621780.221, "dur": 12.939, + "args": { + "External id": 455894,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450621795.173, "dur": 11.553, + "args": { + "External id": 455895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450621810.698, "dur": 20.485, + "args": { + "External id": 455896,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450621812.944, "dur": 2.285, + "args": { + "External id": 455897,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450621817.184, "dur": 0.404, + "args": { + "External id": 455898,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450621832.793, "dur": 11.967, + "args": { + "External id": 455899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450621845.806, "dur": 9.794, + "args": { + "External id": 455900,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450621865.137, "dur": 2.162, + "args": { + "External id": 455901,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450621877.917, "dur": 3.561, + "args": { + "External id": 455902,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450621880.296, "dur": 0.278, + "args": { + "External id": 455903,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450621957.377, "dur": 57.346, + "args": { + "External id": 455904,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450622019.736, "dur": 5.908, + "args": { + "External id": 455905,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450622022.102, "dur": 2.214, + "args": { + "External id": 455906,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450622027.090, "dur": 70.348, + "args": { + "External id": 455907,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450622105.400, "dur": 8.187, + "args": { + "External id": 455908,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450622107.195, "dur": 5.542, + "args": { + "External id": 455909,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450622111.547, "dur": 0.953, + "args": { + "External id": 455910,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450622117.037, "dur": 50.776, + "args": { + "External id": 455911,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450622118.523, "dur": 48.573, + "args": { + "External id": 455912,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450622172.388, "dur": 16.745, + "args": { + "External id": 455913,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450622196.376, "dur": 4.148, + "args": { + "External id": 455914,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450622198.812, "dur": 0.789, + "args": { + "External id": 455915,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450622204.796, "dur": 47.908, + "args": { + "External id": 455916,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450622207.917, "dur": 3.742, + "args": { + "External id": 455917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450622209.153, "dur": 1.908, + "args": { + "External id": 455918,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450622210.410, "dur": 0.540, + "args": { + "External id": 455919,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450622212.503, "dur": 39.824, + "args": { + "External id": 455920,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450622213.353, "dur": 38.327, + "args": { + "External id": 455921,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450622264.479, "dur": 3.755, + "args": { + "External id": 455922,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450622266.356, "dur": 0.649, + "args": { + "External id": 455923,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450622277.146, "dur": 1.892, + "args": { + "External id": 455924,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450622287.655, "dur": 10.252, + "args": { + "External id": 455925,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450622289.699, "dur": 7.917, + "args": { + "External id": 455926,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450622392.047, "dur": 197.582, + "args": { + "External id": 455927,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450622393.903, "dur": 1.611, + "args": { + "External id": 455928,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450622397.387, "dur": 191.658, + "args": { + "External id": 455929,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450622400.939, "dur": 0.362, + "args": { + "External id": 455930,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450622402.662, "dur": 23.761, + "args": { + "External id": 455931,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450622428.071, "dur": 3.072, + "args": { + "External id": 455932,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450622430.139, "dur": 0.673, + "args": { + "External id": 455933,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450622432.054, "dur": 22.447, + "args": { + "External id": 455934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450622432.760, "dur": 1.521, + "args": { + "External id": 455935,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450622435.731, "dur": 18.398, + "args": { + "External id": 455936,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450622440.399, "dur": 2.617, + "args": { + "External id": 455937,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450622455.994, "dur": 26.339, + "args": { + "External id": 455938,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450622483.781, "dur": 14.511, + "args": { + "External id": 455939,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450622503.641, "dur": 15.820, + "args": { + "External id": 455940,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450622521.061, "dur": 13.502, + "args": { + "External id": 455941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450622536.468, "dur": 21.521, + "args": { + "External id": 455942,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450622538.431, "dur": 1.661, + "args": { + "External id": 455943,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450622542.103, "dur": 0.611, + "args": { + "External id": 455944,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450622559.656, "dur": 12.938, + "args": { + "External id": 455945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450622576.025, "dur": 12.154, + "args": { + "External id": 455946,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450622613.841, "dur": 2.174, + "args": { + "External id": 455947,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450622626.067, "dur": 3.597, + "args": { + "External id": 455948,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450622628.532, "dur": 0.388, + "args": { + "External id": 455949,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450622700.369, "dur": 63.930, + "args": { + "External id": 455950,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450622769.414, "dur": 4.691, + "args": { + "External id": 455951,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450622772.075, "dur": 0.920, + "args": { + "External id": 455952,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450622775.917, "dur": 29.061, + "args": { + "External id": 455953,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450622811.547, "dur": 5.527, + "args": { + "External id": 455954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450622813.264, "dur": 3.033, + "args": { + "External id": 455955,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450622815.005, "dur": 1.062, + "args": { + "External id": 455956,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450622819.849, "dur": 41.326, + "args": { + "External id": 455957,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450622820.774, "dur": 39.754, + "args": { + "External id": 455958,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450622865.349, "dur": 14.581, + "args": { + "External id": 455959,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450622885.713, "dur": 5.902, + "args": { + "External id": 455960,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450622890.089, "dur": 0.634, + "args": { + "External id": 455961,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450622895.467, "dur": 47.513, + "args": { + "External id": 455962,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450622896.514, "dur": 5.694, + "args": { + "External id": 455963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450622897.159, "dur": 4.357, + "args": { + "External id": 455964,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450622898.495, "dur": 2.799, + "args": { + "External id": 455965,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450622902.769, "dur": 39.767, + "args": { + "External id": 455966,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450622905.399, "dur": 36.500, + "args": { + "External id": 455967,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450622947.592, "dur": 3.747, + "args": { + "External id": 455968,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450622949.761, "dur": 0.502, + "args": { + "External id": 455969,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450622956.785, "dur": 1.449, + "args": { + "External id": 455970,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450622966.073, "dur": 6.767, + "args": { + "External id": 455971,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450622968.442, "dur": 4.135, + "args": { + "External id": 455972,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450623092.268, "dur": 187.051, + "args": { + "External id": 455973,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450623095.096, "dur": 5.135, + "args": { + "External id": 455974,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450623102.108, "dur": 176.762, + "args": { + "External id": 455975,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450623103.331, "dur": 0.532, + "args": { + "External id": 455976,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450623106.835, "dur": 24.139, + "args": { + "External id": 455977,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450623132.469, "dur": 3.705, + "args": { + "External id": 455978,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450623134.768, "dur": 1.158, + "args": { + "External id": 455979,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450623137.324, "dur": 25.875, + "args": { + "External id": 455980,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450623138.412, "dur": 2.437, + "args": { + "External id": 455981,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450623144.767, "dur": 18.031, + "args": { + "External id": 455982,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450623148.222, "dur": 2.507, + "args": { + "External id": 455983,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450623164.774, "dur": 20.405, + "args": { + "External id": 455984,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450623186.770, "dur": 13.398, + "args": { + "External id": 455985,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450623203.020, "dur": 12.827, + "args": { + "External id": 455986,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450623217.600, "dur": 11.843, + "args": { + "External id": 455987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450623231.348, "dur": 19.147, + "args": { + "External id": 455988,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450623233.137, "dur": 1.571, + "args": { + "External id": 455989,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450623237.273, "dur": 0.470, + "args": { + "External id": 455990,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450623254.370, "dur": 11.949, + "args": { + "External id": 455991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450623267.424, "dur": 10.313, + "args": { + "External id": 455992,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450623287.103, "dur": 2.144, + "args": { + "External id": 455993,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450623298.817, "dur": 4.905, + "args": { + "External id": 455994,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450623302.168, "dur": 0.655, + "args": { + "External id": 455995,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450623371.008, "dur": 56.663, + "args": { + "External id": 455996,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450623432.490, "dur": 5.590, + "args": { + "External id": 455997,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450623435.907, "dur": 0.866, + "args": { + "External id": 455998,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450623442.214, "dur": 22.654, + "args": { + "External id": 455999,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450623469.633, "dur": 7.644, + "args": { + "External id": 456000,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450623471.474, "dur": 4.817, + "args": { + "External id": 456001,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450623473.381, "dur": 2.757, + "args": { + "External id": 456002,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450623479.747, "dur": 41.694, + "args": { + "External id": 456003,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450623481.021, "dur": 39.738, + "args": { + "External id": 456004,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450623525.452, "dur": 13.206, + "args": { + "External id": 456005,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450623546.583, "dur": 3.376, + "args": { + "External id": 456006,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450623548.792, "dur": 0.375, + "args": { + "External id": 456007,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450623554.481, "dur": 62.543, + "args": { + "External id": 456008,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450623555.472, "dur": 3.967, + "args": { + "External id": 456009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450623556.134, "dur": 2.728, + "args": { + "External id": 456010,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450623558.088, "dur": 0.642, + "args": { + "External id": 456011,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450623562.644, "dur": 53.849, + "args": { + "External id": 456012,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450623563.385, "dur": 52.205, + "args": { + "External id": 456013,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450623624.063, "dur": 4.514, + "args": { + "External id": 456014,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450623626.535, "dur": 0.660, + "args": { + "External id": 456015,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450623635.207, "dur": 1.666, + "args": { + "External id": 456016,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450623644.942, "dur": 11.535, + "args": { + "External id": 456017,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450623649.652, "dur": 6.537, + "args": { + "External id": 456018,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450623739.724, "dur": 174.829, + "args": { + "External id": 456019,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450623742.030, "dur": 1.662, + "args": { + "External id": 456020,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450623747.719, "dur": 166.406, + "args": { + "External id": 456021,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450623749.197, "dur": 0.424, + "args": { + "External id": 456022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450623751.064, "dur": 20.919, + "args": { + "External id": 456023,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450623773.534, "dur": 5.803, + "args": { + "External id": 456024,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450623776.117, "dur": 2.901, + "args": { + "External id": 456025,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450623780.367, "dur": 22.904, + "args": { + "External id": 456026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450623784.125, "dur": 1.068, + "args": { + "External id": 456027,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450623786.437, "dur": 16.488, + "args": { + "External id": 456028,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450623789.265, "dur": 2.588, + "args": { + "External id": 456029,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450623804.625, "dur": 19.880, + "args": { + "External id": 456030,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450623825.950, "dur": 11.830, + "args": { + "External id": 456031,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450623840.828, "dur": 13.069, + "args": { + "External id": 456032,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450623855.201, "dur": 10.659, + "args": { + "External id": 456033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450623867.496, "dur": 22.125, + "args": { + "External id": 456034,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450623869.158, "dur": 1.780, + "args": { + "External id": 456035,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450623874.625, "dur": 2.337, + "args": { + "External id": 456036,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450623891.092, "dur": 10.820, + "args": { + "External id": 456037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450623902.825, "dur": 10.332, + "args": { + "External id": 456038,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450623921.118, "dur": 1.407, + "args": { + "External id": 456039,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450623930.088, "dur": 3.890, + "args": { + "External id": 456040,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450623932.764, "dur": 0.371, + "args": { + "External id": 456041,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450623994.169, "dur": 52.447, + "args": { + "External id": 456042,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450624099.190, "dur": 7.311, + "args": { + "External id": 456043,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450624103.655, "dur": 1.089, + "args": { + "External id": 456044,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450624107.986, "dur": 27.586, + "args": { + "External id": 456045,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450624141.528, "dur": 6.627, + "args": { + "External id": 456046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450624143.510, "dur": 3.896, + "args": { + "External id": 456047,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450624145.775, "dur": 1.428, + "args": { + "External id": 456048,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450624154.030, "dur": 46.834, + "args": { + "External id": 456049,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450624155.212, "dur": 45.120, + "args": { + "External id": 456050,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450624211.586, "dur": 22.230, + "args": { + "External id": 456051,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450624240.439, "dur": 3.488, + "args": { + "External id": 456052,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450624242.429, "dur": 0.606, + "args": { + "External id": 456053,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450624247.866, "dur": 48.533, + "args": { + "External id": 456054,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450624248.726, "dur": 6.260, + "args": { + "External id": 456055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450624251.968, "dur": 2.368, + "args": { + "External id": 456056,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450624253.698, "dur": 0.512, + "args": { + "External id": 456057,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450624255.720, "dur": 40.273, + "args": { + "External id": 456058,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450624256.112, "dur": 39.343, + "args": { + "External id": 456059,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450624300.964, "dur": 3.249, + "args": { + "External id": 456060,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450624302.694, "dur": 0.478, + "args": { + "External id": 456061,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450624310.317, "dur": 1.631, + "args": { + "External id": 456062,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450624321.807, "dur": 11.607, + "args": { + "External id": 456063,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450624323.917, "dur": 9.207, + "args": { + "External id": 456064,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450624426.353, "dur": 194.218, + "args": { + "External id": 456065,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450624428.789, "dur": 1.588, + "args": { + "External id": 456066,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450624432.397, "dur": 187.573, + "args": { + "External id": 456067,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450624434.139, "dur": 0.587, + "args": { + "External id": 456068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450624438.208, "dur": 21.585, + "args": { + "External id": 456069,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450624461.345, "dur": 4.156, + "args": { + "External id": 456070,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450624464.408, "dur": 0.703, + "args": { + "External id": 456071,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450624466.534, "dur": 22.346, + "args": { + "External id": 456072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450624467.685, "dur": 1.461, + "args": { + "External id": 456073,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450624470.370, "dur": 18.131, + "args": { + "External id": 456074,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450624475.611, "dur": 2.209, + "args": { + "External id": 456075,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450624490.097, "dur": 20.231, + "args": { + "External id": 456076,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450624511.660, "dur": 13.338, + "args": { + "External id": 456077,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450624527.546, "dur": 12.398, + "args": { + "External id": 456078,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450624541.519, "dur": 11.425, + "args": { + "External id": 456079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450624556.929, "dur": 19.254, + "args": { + "External id": 456080,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450624559.234, "dur": 1.387, + "args": { + "External id": 456081,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450624562.997, "dur": 0.476, + "args": { + "External id": 456082,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450624577.534, "dur": 12.119, + "args": { + "External id": 456083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450624590.648, "dur": 27.500, + "args": { + "External id": 456084,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450624629.393, "dur": 2.157, + "args": { + "External id": 456085,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450624640.996, "dur": 4.518, + "args": { + "External id": 456086,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450624644.197, "dur": 0.472, + "args": { + "External id": 456087,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450624718.257, "dur": 58.110, + "args": { + "External id": 456088,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450624781.988, "dur": 4.995, + "args": { + "External id": 456089,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450624784.830, "dur": 0.947, + "args": { + "External id": 456090,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450624788.246, "dur": 23.489, + "args": { + "External id": 456091,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450624816.216, "dur": 8.953, + "args": { + "External id": 456092,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450624818.046, "dur": 6.313, + "args": { + "External id": 456093,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450624822.940, "dur": 1.268, + "args": { + "External id": 456094,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450624827.699, "dur": 41.301, + "args": { + "External id": 456095,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450624828.828, "dur": 39.689, + "args": { + "External id": 456096,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450624872.799, "dur": 13.623, + "args": { + "External id": 456097,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450624892.448, "dur": 3.763, + "args": { + "External id": 456098,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450624894.550, "dur": 0.809, + "args": { + "External id": 456099,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450624899.872, "dur": 47.865, + "args": { + "External id": 456100,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450624903.308, "dur": 5.117, + "args": { + "External id": 456101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450624904.046, "dur": 3.765, + "args": { + "External id": 456102,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450624905.676, "dur": 1.941, + "args": { + "External id": 456103,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450624909.299, "dur": 37.776, + "args": { + "External id": 456104,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450624909.660, "dur": 36.750, + "args": { + "External id": 456105,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450624951.827, "dur": 5.642, + "args": { + "External id": 456106,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450624954.015, "dur": 2.313, + "args": { + "External id": 456107,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450624965.995, "dur": 1.400, + "args": { + "External id": 456108,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450624974.835, "dur": 8.479, + "args": { + "External id": 456109,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450624979.611, "dur": 3.408, + "args": { + "External id": 456110,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450625104.401, "dur": 201.329, + "args": { + "External id": 456111,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450625108.888, "dur": 3.016, + "args": { + "External id": 456112,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450625113.817, "dur": 191.316, + "args": { + "External id": 456113,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450625117.879, "dur": 0.605, + "args": { + "External id": 456114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450625119.664, "dur": 23.526, + "args": { + "External id": 456115,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450625144.730, "dur": 6.105, + "args": { + "External id": 456116,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450625149.430, "dur": 1.118, + "args": { + "External id": 456117,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450625151.796, "dur": 21.872, + "args": { + "External id": 456118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450625152.928, "dur": 1.609, + "args": { + "External id": 456119,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450625155.794, "dur": 17.553, + "args": { + "External id": 456120,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450625159.275, "dur": 2.729, + "args": { + "External id": 456121,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450625175.117, "dur": 23.395, + "args": { + "External id": 456122,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450625199.977, "dur": 13.153, + "args": { + "External id": 456123,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450625218.146, "dur": 16.160, + "args": { + "External id": 456124,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450625235.944, "dur": 12.899, + "args": { + "External id": 456125,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450625250.370, "dur": 23.952, + "args": { + "External id": 456126,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450625255.056, "dur": 1.583, + "args": { + "External id": 456127,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450625258.885, "dur": 0.657, + "args": { + "External id": 456128,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450625275.690, "dur": 13.125, + "args": { + "External id": 456129,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450625292.312, "dur": 11.858, + "args": { + "External id": 456130,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450625313.930, "dur": 2.243, + "args": { + "External id": 456131,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450625325.496, "dur": 4.488, + "args": { + "External id": 456132,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450625328.476, "dur": 0.600, + "args": { + "External id": 456133,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450625399.697, "dur": 60.907, + "args": { + "External id": 456134,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450625465.969, "dur": 5.057, + "args": { + "External id": 456135,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450625469.412, "dur": 0.706, + "args": { + "External id": 456136,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450625472.717, "dur": 25.448, + "args": { + "External id": 456137,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450625504.908, "dur": 6.278, + "args": { + "External id": 456138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450625506.917, "dur": 3.572, + "args": { + "External id": 456139,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450625508.814, "dur": 1.471, + "args": { + "External id": 456140,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450625513.824, "dur": 41.660, + "args": { + "External id": 456141,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450625515.001, "dur": 39.900, + "args": { + "External id": 456142,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450625559.129, "dur": 15.417, + "args": { + "External id": 456143,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450625580.476, "dur": 6.374, + "args": { + "External id": 456144,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450625585.583, "dur": 0.488, + "args": { + "External id": 456145,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450625590.684, "dur": 70.923, + "args": { + "External id": 456146,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450625609.723, "dur": 5.792, + "args": { + "External id": 456147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450625611.475, "dur": 3.181, + "args": { + "External id": 456148,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450625613.176, "dur": 1.165, + "args": { + "External id": 456149,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450625616.213, "dur": 44.872, + "args": { + "External id": 456150,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450625619.275, "dur": 40.987, + "args": { + "External id": 456151,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450625666.619, "dur": 4.049, + "args": { + "External id": 456152,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450625669.010, "dur": 0.530, + "args": { + "External id": 456153,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450625676.892, "dur": 1.716, + "args": { + "External id": 456154,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450625686.529, "dur": 8.364, + "args": { + "External id": 456155,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450625688.440, "dur": 6.131, + "args": { + "External id": 456156,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450625780.273, "dur": 178.187, + "args": { + "External id": 456157,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450625782.288, "dur": 1.716, + "args": { + "External id": 456158,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450625785.899, "dur": 171.959, + "args": { + "External id": 456159,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450625787.737, "dur": 0.371, + "args": { + "External id": 456160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450625789.137, "dur": 25.234, + "args": { + "External id": 456161,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450625816.119, "dur": 3.767, + "args": { + "External id": 456162,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450625818.899, "dur": 0.779, + "args": { + "External id": 456163,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450625820.754, "dur": 25.362, + "args": { + "External id": 456164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450625822.149, "dur": 1.155, + "args": { + "External id": 456165,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450625826.687, "dur": 19.033, + "args": { + "External id": 456166,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450625831.784, "dur": 1.994, + "args": { + "External id": 456167,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450625847.629, "dur": 21.697, + "args": { + "External id": 456168,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450625871.025, "dur": 13.220, + "args": { + "External id": 456169,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450625886.634, "dur": 12.624, + "args": { + "External id": 456170,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450625900.665, "dur": 10.877, + "args": { + "External id": 456171,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450625913.195, "dur": 18.242, + "args": { + "External id": 456172,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450625915.255, "dur": 1.642, + "args": { + "External id": 456173,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450625918.649, "dur": 0.591, + "args": { + "External id": 456174,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450625934.985, "dur": 10.200, + "args": { + "External id": 456175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450625946.117, "dur": 10.855, + "args": { + "External id": 456176,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450625964.515, "dur": 1.479, + "args": { + "External id": 456177,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450625974.783, "dur": 3.955, + "args": { + "External id": 456178,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450625977.377, "dur": 0.545, + "args": { + "External id": 456179,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450626041.093, "dur": 102.674, + "args": { + "External id": 456180,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450626151.389, "dur": 7.289, + "args": { + "External id": 456181,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450626155.498, "dur": 1.502, + "args": { + "External id": 456182,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450626162.571, "dur": 26.566, + "args": { + "External id": 456183,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450626194.636, "dur": 5.235, + "args": { + "External id": 456184,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450626196.520, "dur": 2.665, + "args": { + "External id": 456185,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450626198.146, "dur": 0.830, + "args": { + "External id": 456186,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450626202.738, "dur": 46.237, + "args": { + "External id": 456187,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450626203.749, "dur": 44.555, + "args": { + "External id": 456188,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450626252.780, "dur": 16.288, + "args": { + "External id": 456189,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450626276.563, "dur": 28.453, + "args": { + "External id": 456190,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450626279.438, "dur": 25.028, + "args": { + "External id": 456191,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450626286.662, "dur": 0.529, + "args": { + "External id": 456192,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450626337.591, "dur": 27.375, + "args": { + "External id": 456193,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450626339.518, "dur": 25.184, + "args": { + "External id": 456194,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 15430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450626344.612, "dur": 3.758, + "args": { + "External id": 456195,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450626349.369, "dur": 14.900, + "args": { + "External id": 456196,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2338707, + "ts": 6267450626379.271, "dur": 7.427, + "args": { + "External id": 456197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2338707, + "ts": 6267450626383.518, "dur": 2.867, + "args": { + "External id": 456198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2338707, + "ts": 6267450626387.738, "dur": 0.944, + "args": { + "External id": 456199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2338707, + "ts": 6267450626388.060, "dur": 0.535, + "args": { + "External id": 456200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450626432.252, "dur": 26.377, + "args": { + "External id": 456201,"Sequence number": 5036574, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450626460.514, "dur": 16.669, + "args": { + "External id": 456202,"Sequence number": 5036575, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15438 + } + }, + { + "ph": "s", "id": 16, "pid": 2338707, "tid": 2338707, "ts": 6267450626460.514, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450626483.211, "dur": 7.067, + "args": { + "External id": 456203,"Sequence number": 5036576, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 15439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450626487.599, "dur": 1.083, + "args": { + "External id": 456204,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2338707, + "ts": 6267450626492.906, "dur": 8.420, + "args": { + "External id": 456205,"Sequence number": 5036576, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 15441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450626499.527, "dur": 0.395, + "args": { + "External id": 456206,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450626502.459, "dur": 2.093, + "args": { + "External id": 456207,"Sequence number": 5036576, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 15443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450626503.691, "dur": 0.271, + "args": { + "External id": 456208,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 15444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450626508.423, "dur": 5.172, + "args": { + "External id": 456209,"Sequence number": 5036576, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15445 + } + }, + { + "ph": "s", "id": 15, "pid": 2338707, "tid": 2338707, "ts": 6267450626508.423, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450626511.440, "dur": 0.759, + "args": { + "External id": 456210,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450626516.538, "dur": 6.235, + "args": { + "External id": 456211,"Sequence number": 5036577, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15447 + } + }, + { + "ph": "s", "id": 14, "pid": 2338707, "tid": 2338707, "ts": 6267450626516.538, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450626521.506, "dur": 0.460, + "args": { + "External id": 456212,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2338707, + "ts": 6267450626523.777, "dur": 5.395, + "args": { + "External id": 456213,"Sequence number": 5036578, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 15449 + } + }, + { + "ph": "s", "id": 13, "pid": 2338707, "tid": 2338707, "ts": 6267450626523.777, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450626527.759, "dur": 0.557, + "args": { + "External id": 456214,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450626530.151, "dur": 6.078, + "args": { + "External id": 456215,"Sequence number": 5036579, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 15451 + } + }, + { + "ph": "s", "id": 12, "pid": 2338707, "tid": 2338707, "ts": 6267450626530.151, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450626532.735, "dur": 2.780, + "args": { + "External id": 456216,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 15452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267450626540.075, "dur": 33.404, + "args": { + "External id": 456217,"Sequence number": 5036580, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267450626543.788, "dur": 29.468, + "args": { + "External id": 456218,"Sequence number": 5036580, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450626546.558, "dur": 6.852, + "args": { + "External id": 456219,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 15455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450626548.871, "dur": 3.928, + "args": { + "External id": 456220,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450626554.243, "dur": 18.541, + "args": { + "External id": 456221,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 15457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450626620.086, "dur": 5.096, + "args": { + "External id": 456222,"Sequence number": 5036580, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 15458 + } + }, + { + "ph": "s", "id": 11, "pid": 2338707, "tid": 2338707, "ts": 6267450626620.086, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450626627.869, "dur": 0.985, + "args": { + "External id": 456223,"Sequence number": 5036581, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450626667.110, "dur": 46148.260, + "args": { + "External id": 456224,"Sequence number": 5036581, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 15460 + } + }, + { + "ph": "s", "id": 10, "pid": 2338707, "tid": 2338707, "ts": 6267450626667.110, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267450626684.188, "dur": 32.041, + "args": { + "External id": 456225,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267450626684.816, "dur": 31.117, + "args": { + "External id": 456226,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450626686.466, "dur": 5.306, + "args": { + "External id": 456227,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450626687.665, "dur": 3.768, + "args": { + "External id": 456228,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450626692.780, "dur": 22.667, + "args": { + "External id": 456229,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 15465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450626734.519, "dur": 29.090, + "args": { + "External id": 456230,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450626735.606, "dur": 6.003, + "args": { + "External id": 456231,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450626737.890, "dur": 3.465, + "args": { + "External id": 456232,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267450626742.920, "dur": 20.462, + "args": { + "External id": 456233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450626746.940, "dur": 15.946, + "args": { + "External id": 456234,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450626767.656, "dur": 20.662, + "args": { + "External id": 456235,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450626768.775, "dur": 4.438, + "args": { + "External id": 456236,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450626770.140, "dur": 2.793, + "args": { + "External id": 456237,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267450626773.693, "dur": 14.429, + "args": { + "External id": 456238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450626774.158, "dur": 13.560, + "args": { + "External id": 456239,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 15475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2338707, + "ts": 6267450626794.648, "dur": 21.678, + "args": { + "External id": 456240,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 15476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450626796.385, "dur": 2.675, + "args": { + "External id": 456241,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267450626799.700, "dur": 16.284, + "args": { + "External id": 456242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 15478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450626804.376, "dur": 11.298, + "args": { + "External id": 456243,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2338707, + "ts": 6267450626820.824, "dur": 24.592, + "args": { + "External id": 456244,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450626848.080, "dur": 53.038, + "args": { + "External id": 456245,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450626850.551, "dur": 50.045, + "args": { + "External id": 456246,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450626856.064, "dur": 0.798, + "args": { + "External id": 456247,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450626858.280, "dur": 26.673, + "args": { + "External id": 456248,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450626860.176, "dur": 24.570, + "args": { + "External id": 456249,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 15485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450626863.089, "dur": 4.811, + "args": { + "External id": 456250,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450626868.581, "dur": 15.588, + "args": { + "External id": 456251,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 15487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2338707, + "ts": 6267450626905.081, "dur": 40093.834, + "args": { + "External id": 456252,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2338707, + "ts": 6267450626906.856, "dur": 40091.205, + "args": { + "External id": 456253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450667013.435, "dur": 6.914, + "args": { + "External id": 456254,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450667017.392, "dur": 1.307, + "args": { + "External id": 456255,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450667025.995, "dur": 137.127, + "args": { + "External id": 456256,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450667027.668, "dur": 6.542, + "args": { + "External id": 456257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450667030.311, "dur": 3.016, + "args": { + "External id": 456258,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450667032.322, "dur": 0.743, + "args": { + "External id": 456259,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450667038.221, "dur": 124.197, + "args": { + "External id": 456260,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450667039.840, "dur": 121.162, + "args": { + "External id": 456261,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450667168.928, "dur": 5.650, + "args": { + "External id": 456262,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450667171.955, "dur": 1.000, + "args": { + "External id": 456263,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450667182.859, "dur": 2.380, + "args": { + "External id": 456264,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450667197.050, "dur": 9.978, + "args": { + "External id": 456265,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450667201.821, "dur": 4.861, + "args": { + "External id": 456266,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450667348.701, "dur": 216.756, + "args": { + "External id": 456267,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450667352.265, "dur": 2.175, + "args": { + "External id": 456268,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450667356.339, "dur": 208.612, + "args": { + "External id": 456269,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450667358.129, "dur": 0.500, + "args": { + "External id": 456270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450667360.578, "dur": 31.151, + "args": { + "External id": 456271,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450667393.885, "dur": 3.697, + "args": { + "External id": 456272,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450667396.585, "dur": 0.651, + "args": { + "External id": 456273,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450667399.046, "dur": 30.812, + "args": { + "External id": 456274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450667402.608, "dur": 1.142, + "args": { + "External id": 456275,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450667405.473, "dur": 24.064, + "args": { + "External id": 456276,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450667411.590, "dur": 3.292, + "args": { + "External id": 456277,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450667431.798, "dur": 22.829, + "args": { + "External id": 456278,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450667457.029, "dur": 17.129, + "args": { + "External id": 456279,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450667477.584, "dur": 16.033, + "args": { + "External id": 456280,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450667495.245, "dur": 15.275, + "args": { + "External id": 456281,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450667512.291, "dur": 24.121, + "args": { + "External id": 456282,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450667514.680, "dur": 1.516, + "args": { + "External id": 456283,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450667520.562, "dur": 0.732, + "args": { + "External id": 456284,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450667538.013, "dur": 14.030, + "args": { + "External id": 456285,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450667553.354, "dur": 10.519, + "args": { + "External id": 456286,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450667573.576, "dur": 1.648, + "args": { + "External id": 456287,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450667582.217, "dur": 3.699, + "args": { + "External id": 456288,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450667584.318, "dur": 0.624, + "args": { + "External id": 456289,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450667682.603, "dur": 77.124, + "args": { + "External id": 456290,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450667768.113, "dur": 7.407, + "args": { + "External id": 456291,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450667771.270, "dur": 1.134, + "args": { + "External id": 456292,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450667777.300, "dur": 33.316, + "args": { + "External id": 456293,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450667817.160, "dur": 7.248, + "args": { + "External id": 456294,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450667819.362, "dur": 4.249, + "args": { + "External id": 456295,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450667822.208, "dur": 1.176, + "args": { + "External id": 456296,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450667830.158, "dur": 45.985, + "args": { + "External id": 456297,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450667831.484, "dur": 43.985, + "args": { + "External id": 456298,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450667880.626, "dur": 15.310, + "args": { + "External id": 456299,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450667902.786, "dur": 3.816, + "args": { + "External id": 456300,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450667905.071, "dur": 0.540, + "args": { + "External id": 456301,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450667911.354, "dur": 52.759, + "args": { + "External id": 456302,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450667912.482, "dur": 8.469, + "args": { + "External id": 456303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450667915.984, "dur": 4.347, + "args": { + "External id": 456304,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450667919.594, "dur": 0.590, + "args": { + "External id": 456305,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450667922.071, "dur": 41.536, + "args": { + "External id": 456306,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450667922.829, "dur": 40.051, + "args": { + "External id": 456307,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450667968.166, "dur": 4.123, + "args": { + "External id": 456308,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450667970.187, "dur": 0.650, + "args": { + "External id": 456309,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450667980.911, "dur": 1.572, + "args": { + "External id": 456310,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450667991.291, "dur": 6.300, + "args": { + "External id": 456311,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450667993.460, "dur": 3.871, + "args": { + "External id": 456312,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450668137.484, "dur": 192.962, + "args": { + "External id": 456313,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450668140.130, "dur": 3.342, + "args": { + "External id": 456314,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450668145.654, "dur": 184.188, + "args": { + "External id": 456315,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450668151.820, "dur": 0.390, + "args": { + "External id": 456316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450668153.557, "dur": 22.915, + "args": { + "External id": 456317,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450668178.418, "dur": 4.158, + "args": { + "External id": 456318,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450668181.018, "dur": 1.186, + "args": { + "External id": 456319,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450668183.828, "dur": 23.320, + "args": { + "External id": 456320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450668187.539, "dur": 0.924, + "args": { + "External id": 456321,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450668189.957, "dur": 16.873, + "args": { + "External id": 456322,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450668192.737, "dur": 2.384, + "args": { + "External id": 456323,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450668208.660, "dur": 24.781, + "args": { + "External id": 456324,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450668235.358, "dur": 12.548, + "args": { + "External id": 456325,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450668253.293, "dur": 14.037, + "args": { + "External id": 456326,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450668268.960, "dur": 11.705, + "args": { + "External id": 456327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450668282.807, "dur": 21.198, + "args": { + "External id": 456328,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450668285.285, "dur": 2.259, + "args": { + "External id": 456329,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450668289.340, "dur": 0.656, + "args": { + "External id": 456330,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450668305.345, "dur": 11.560, + "args": { + "External id": 456331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450668318.089, "dur": 10.359, + "args": { + "External id": 456332,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450668340.951, "dur": 2.140, + "args": { + "External id": 456333,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450668354.602, "dur": 4.154, + "args": { + "External id": 456334,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450668357.259, "dur": 0.644, + "args": { + "External id": 456335,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450668431.171, "dur": 60.760, + "args": { + "External id": 456336,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450668497.802, "dur": 7.297, + "args": { + "External id": 456337,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450668501.186, "dur": 2.730, + "args": { + "External id": 456338,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450668506.521, "dur": 26.568, + "args": { + "External id": 456339,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450668537.769, "dur": 7.708, + "args": { + "External id": 456340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450668539.558, "dur": 5.168, + "args": { + "External id": 456341,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450668543.605, "dur": 0.917, + "args": { + "External id": 456342,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450668548.460, "dur": 42.480, + "args": { + "External id": 456343,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450668549.935, "dur": 40.484, + "args": { + "External id": 456344,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450668612.494, "dur": 17.041, + "args": { + "External id": 456345,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450668639.983, "dur": 4.313, + "args": { + "External id": 456346,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450668642.365, "dur": 0.800, + "args": { + "External id": 456347,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450668651.179, "dur": 49.574, + "args": { + "External id": 456348,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450668652.345, "dur": 4.158, + "args": { + "External id": 456349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450668653.686, "dur": 2.210, + "args": { + "External id": 456350,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450668654.905, "dur": 0.872, + "args": { + "External id": 456351,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450668657.315, "dur": 43.001, + "args": { + "External id": 456352,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450668658.160, "dur": 41.345, + "args": { + "External id": 456353,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450668705.214, "dur": 4.024, + "args": { + "External id": 456354,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450668707.597, "dur": 0.539, + "args": { + "External id": 456355,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450668718.395, "dur": 1.854, + "args": { + "External id": 456356,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450668729.233, "dur": 9.116, + "args": { + "External id": 456357,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450668731.111, "dur": 6.945, + "args": { + "External id": 456358,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450668831.581, "dur": 184.690, + "args": { + "External id": 456359,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450668833.589, "dur": 1.678, + "args": { + "External id": 456360,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450668837.271, "dur": 178.471, + "args": { + "External id": 456361,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450668840.875, "dur": 0.345, + "args": { + "External id": 456362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450668842.187, "dur": 22.111, + "args": { + "External id": 456363,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450668865.971, "dur": 3.273, + "args": { + "External id": 456364,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450668868.360, "dur": 0.687, + "args": { + "External id": 456365,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450668870.231, "dur": 21.673, + "args": { + "External id": 456366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450668871.632, "dur": 1.117, + "args": { + "External id": 456367,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450668873.950, "dur": 17.623, + "args": { + "External id": 456368,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450668878.174, "dur": 2.451, + "args": { + "External id": 456369,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450668893.463, "dur": 18.909, + "args": { + "External id": 456370,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450668913.985, "dur": 12.964, + "args": { + "External id": 456371,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450668932.481, "dur": 14.959, + "args": { + "External id": 456372,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450668949.023, "dur": 13.939, + "args": { + "External id": 456373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450668964.925, "dur": 20.542, + "args": { + "External id": 456374,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450668967.101, "dur": 1.658, + "args": { + "External id": 456375,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450668970.444, "dur": 0.609, + "args": { + "External id": 456376,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450668986.857, "dur": 13.629, + "args": { + "External id": 456377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450669003.754, "dur": 10.758, + "args": { + "External id": 456378,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450669023.161, "dur": 1.704, + "args": { + "External id": 456379,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450669032.900, "dur": 3.049, + "args": { + "External id": 456380,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450669034.773, "dur": 0.390, + "args": { + "External id": 456381,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450669145.668, "dur": 80.108, + "args": { + "External id": 456382,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450669232.250, "dur": 6.771, + "args": { + "External id": 456383,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450669236.021, "dur": 1.570, + "args": { + "External id": 456384,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450669240.661, "dur": 30.814, + "args": { + "External id": 456385,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450669279.020, "dur": 5.489, + "args": { + "External id": 456386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450669280.838, "dur": 2.945, + "args": { + "External id": 456387,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450669282.742, "dur": 0.810, + "args": { + "External id": 456388,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450669287.323, "dur": 43.283, + "args": { + "External id": 456389,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450669289.067, "dur": 40.744, + "args": { + "External id": 456390,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450669334.603, "dur": 15.439, + "args": { + "External id": 456391,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450669356.332, "dur": 5.628, + "args": { + "External id": 456392,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450669360.541, "dur": 0.420, + "args": { + "External id": 456393,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450669366.896, "dur": 51.705, + "args": { + "External id": 456394,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450669367.951, "dur": 5.256, + "args": { + "External id": 456395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450669368.801, "dur": 3.710, + "args": { + "External id": 456396,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450669369.789, "dur": 2.502, + "args": { + "External id": 456397,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450669374.128, "dur": 44.013, + "args": { + "External id": 456398,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450669379.768, "dur": 37.740, + "args": { + "External id": 456399,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450669422.860, "dur": 3.401, + "args": { + "External id": 456400,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450669424.838, "dur": 0.352, + "args": { + "External id": 456401,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450669432.074, "dur": 1.615, + "args": { + "External id": 456402,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450669441.911, "dur": 10.026, + "args": { + "External id": 456403,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450669445.865, "dur": 5.811, + "args": { + "External id": 456404,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450669541.574, "dur": 215.871, + "args": { + "External id": 456405,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450669545.960, "dur": 1.556, + "args": { + "External id": 456406,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450669548.992, "dur": 208.026, + "args": { + "External id": 456407,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450669550.402, "dur": 0.410, + "args": { + "External id": 456408,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450669551.837, "dur": 22.166, + "args": { + "External id": 456409,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450669575.980, "dur": 4.930, + "args": { + "External id": 456410,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450669580.014, "dur": 0.660, + "args": { + "External id": 456411,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450669582.055, "dur": 43.812, + "args": { + "External id": 456412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450669585.256, "dur": 0.896, + "args": { + "External id": 456413,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450669587.728, "dur": 37.655, + "args": { + "External id": 456414,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450669590.289, "dur": 17.978, + "args": { + "External id": 456415,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450669627.891, "dur": 22.933, + "args": { + "External id": 456416,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450669652.475, "dur": 15.801, + "args": { + "External id": 456417,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450669671.401, "dur": 15.895, + "args": { + "External id": 456418,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450669689.015, "dur": 12.900, + "args": { + "External id": 456419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450669703.630, "dur": 24.446, + "args": { + "External id": 456420,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450669706.103, "dur": 1.700, + "args": { + "External id": 456421,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450669711.277, "dur": 0.629, + "args": { + "External id": 456422,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450669729.376, "dur": 12.955, + "args": { + "External id": 456423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450669743.495, "dur": 12.299, + "args": { + "External id": 456424,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450669765.030, "dur": 2.201, + "args": { + "External id": 456425,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450669776.755, "dur": 3.551, + "args": { + "External id": 456426,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450669778.973, "dur": 0.473, + "args": { + "External id": 456427,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450669847.724, "dur": 55.752, + "args": { + "External id": 456428,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450669908.450, "dur": 6.936, + "args": { + "External id": 456429,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450669913.371, "dur": 0.810, + "args": { + "External id": 456430,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450669916.951, "dur": 23.715, + "args": { + "External id": 456431,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450669945.020, "dur": 5.776, + "args": { + "External id": 456432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450669946.773, "dur": 3.179, + "args": { + "External id": 456433,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450669948.566, "dur": 1.190, + "args": { + "External id": 456434,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450669953.493, "dur": 40.670, + "args": { + "External id": 456435,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450669954.510, "dur": 38.806, + "args": { + "External id": 456436,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450669998.030, "dur": 13.297, + "args": { + "External id": 456437,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450670019.423, "dur": 3.422, + "args": { + "External id": 456438,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450670021.357, "dur": 0.654, + "args": { + "External id": 456439,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450670026.922, "dur": 82.766, + "args": { + "External id": 456440,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450670027.833, "dur": 5.098, + "args": { + "External id": 456441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450670028.555, "dur": 3.827, + "args": { + "External id": 456442,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450670031.870, "dur": 0.395, + "args": { + "External id": 456443,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450670033.492, "dur": 75.425, + "args": { + "External id": 456444,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450670034.249, "dur": 73.498, + "args": { + "External id": 456445,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450670117.593, "dur": 5.024, + "args": { + "External id": 456446,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450670120.650, "dur": 0.648, + "args": { + "External id": 456447,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450670129.043, "dur": 1.538, + "args": { + "External id": 456448,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450670139.510, "dur": 9.867, + "args": { + "External id": 456449,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450670143.807, "dur": 5.274, + "args": { + "External id": 456450,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450670239.148, "dur": 177.189, + "args": { + "External id": 456451,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450670241.311, "dur": 1.783, + "args": { + "External id": 456452,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450670244.837, "dur": 171.027, + "args": { + "External id": 456453,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450670246.944, "dur": 0.451, + "args": { + "External id": 456454,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450670248.414, "dur": 23.905, + "args": { + "External id": 456455,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450670273.957, "dur": 3.413, + "args": { + "External id": 456456,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450670276.231, "dur": 0.699, + "args": { + "External id": 456457,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450670278.428, "dur": 24.208, + "args": { + "External id": 456458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450670281.764, "dur": 1.185, + "args": { + "External id": 456459,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450670284.092, "dur": 18.258, + "args": { + "External id": 456460,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450670288.502, "dur": 2.959, + "args": { + "External id": 456461,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450670304.091, "dur": 20.946, + "args": { + "External id": 456462,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450670326.499, "dur": 12.739, + "args": { + "External id": 456463,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450670341.770, "dur": 14.377, + "args": { + "External id": 456464,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450670357.366, "dur": 11.537, + "args": { + "External id": 456465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450670370.572, "dur": 20.226, + "args": { + "External id": 456466,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450670372.766, "dur": 1.617, + "args": { + "External id": 456467,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450670378.638, "dur": 0.619, + "args": { + "External id": 456468,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450670392.141, "dur": 11.287, + "args": { + "External id": 456469,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450670404.611, "dur": 10.161, + "args": { + "External id": 456470,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450670422.932, "dur": 1.745, + "args": { + "External id": 456471,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450670434.158, "dur": 3.528, + "args": { + "External id": 456472,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450670436.520, "dur": 0.424, + "args": { + "External id": 456473,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450670507.279, "dur": 61.493, + "args": { + "External id": 456474,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450670575.931, "dur": 5.279, + "args": { + "External id": 456475,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450670579.101, "dur": 0.893, + "args": { + "External id": 456476,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450670582.794, "dur": 45.423, + "args": { + "External id": 456477,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450670635.525, "dur": 6.249, + "args": { + "External id": 456478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450670637.387, "dur": 3.580, + "args": { + "External id": 456479,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450670639.538, "dur": 1.203, + "args": { + "External id": 456480,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450670647.812, "dur": 49.364, + "args": { + "External id": 456481,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450670649.383, "dur": 46.932, + "args": { + "External id": 456482,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450670701.229, "dur": 19.524, + "args": { + "External id": 456483,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450670727.522, "dur": 4.039, + "args": { + "External id": 456484,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450670730.065, "dur": 0.495, + "args": { + "External id": 456485,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450670736.363, "dur": 50.701, + "args": { + "External id": 456486,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450670737.381, "dur": 7.666, + "args": { + "External id": 456487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450670740.989, "dur": 3.414, + "args": { + "External id": 456488,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450670742.290, "dur": 1.904, + "args": { + "External id": 456489,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450670745.634, "dur": 41.019, + "args": { + "External id": 456490,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450670746.379, "dur": 39.638, + "args": { + "External id": 456491,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450670791.219, "dur": 3.762, + "args": { + "External id": 456492,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450670793.192, "dur": 0.429, + "args": { + "External id": 456493,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450670803.512, "dur": 1.395, + "args": { + "External id": 456494,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450670813.572, "dur": 8.011, + "args": { + "External id": 456495,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450670815.503, "dur": 5.782, + "args": { + "External id": 456496,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450670912.334, "dur": 208.841, + "args": { + "External id": 456497,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450670914.492, "dur": 1.449, + "args": { + "External id": 456498,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450670917.482, "dur": 203.111, + "args": { + "External id": 456499,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450670921.267, "dur": 0.472, + "args": { + "External id": 456500,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450670924.447, "dur": 19.381, + "args": { + "External id": 456501,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450670945.486, "dur": 2.580, + "args": { + "External id": 456502,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450670947.242, "dur": 0.595, + "args": { + "External id": 456503,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450670948.770, "dur": 21.822, + "args": { + "External id": 456504,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450670949.787, "dur": 2.946, + "args": { + "External id": 456505,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450670953.946, "dur": 16.337, + "args": { + "External id": 456506,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450670956.589, "dur": 2.448, + "args": { + "External id": 456507,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450670972.104, "dur": 18.822, + "args": { + "External id": 456508,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450670992.413, "dur": 12.464, + "args": { + "External id": 456509,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450671009.588, "dur": 12.535, + "args": { + "External id": 456510,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450671023.952, "dur": 10.776, + "args": { + "External id": 456511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450671036.601, "dur": 54.705, + "args": { + "External id": 456512,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450671039.118, "dur": 1.397, + "args": { + "External id": 456513,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450671042.555, "dur": 0.481, + "args": { + "External id": 456514,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450671093.824, "dur": 13.492, + "args": { + "External id": 456515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450671108.493, "dur": 10.543, + "args": { + "External id": 456516,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450671131.523, "dur": 2.458, + "args": { + "External id": 456517,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450671144.693, "dur": 3.767, + "args": { + "External id": 456518,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450671146.966, "dur": 0.613, + "args": { + "External id": 456519,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450671218.217, "dur": 59.289, + "args": { + "External id": 456520,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450671282.672, "dur": 4.752, + "args": { + "External id": 456521,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450671285.470, "dur": 0.710, + "args": { + "External id": 456522,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450671289.114, "dur": 26.620, + "args": { + "External id": 456523,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450671320.792, "dur": 9.827, + "args": { + "External id": 456524,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450671322.561, "dur": 7.296, + "args": { + "External id": 456525,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450671326.607, "dur": 3.017, + "args": { + "External id": 456526,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450671333.833, "dur": 41.581, + "args": { + "External id": 456527,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450671334.966, "dur": 39.892, + "args": { + "External id": 456528,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450671379.587, "dur": 15.987, + "args": { + "External id": 456529,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450671401.199, "dur": 3.381, + "args": { + "External id": 456530,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450671403.252, "dur": 0.503, + "args": { + "External id": 456531,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450671410.977, "dur": 44.568, + "args": { + "External id": 456532,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450671411.900, "dur": 3.512, + "args": { + "External id": 456533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450671412.701, "dur": 2.165, + "args": { + "External id": 456534,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450671414.000, "dur": 0.641, + "args": { + "External id": 456535,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450671416.099, "dur": 39.040, + "args": { + "External id": 456536,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450671416.602, "dur": 38.020, + "args": { + "External id": 456537,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450671459.912, "dur": 6.142, + "args": { + "External id": 456538,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450671461.788, "dur": 3.096, + "args": { + "External id": 456539,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450671474.157, "dur": 1.512, + "args": { + "External id": 456540,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450671483.849, "dur": 5.852, + "args": { + "External id": 456541,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450671485.414, "dur": 4.011, + "args": { + "External id": 456542,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450671573.374, "dur": 206.978, + "args": { + "External id": 456543,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450671574.898, "dur": 1.836, + "args": { + "External id": 456544,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450671580.182, "dur": 199.528, + "args": { + "External id": 456545,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450671584.070, "dur": 0.356, + "args": { + "External id": 456546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450671585.472, "dur": 38.441, + "args": { + "External id": 456547,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450671626.433, "dur": 4.832, + "args": { + "External id": 456548,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450671628.399, "dur": 2.580, + "args": { + "External id": 456549,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450671632.871, "dur": 24.276, + "args": { + "External id": 456550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450671633.918, "dur": 1.364, + "args": { + "External id": 456551,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450671636.939, "dur": 19.947, + "args": { + "External id": 456552,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450671640.186, "dur": 3.534, + "args": { + "External id": 456553,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450671658.624, "dur": 22.751, + "args": { + "External id": 456554,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450671682.815, "dur": 15.186, + "args": { + "External id": 456555,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450671704.475, "dur": 13.432, + "args": { + "External id": 456556,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450671719.221, "dur": 10.882, + "args": { + "External id": 456557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450671732.051, "dur": 20.889, + "args": { + "External id": 456558,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450671734.015, "dur": 1.717, + "args": { + "External id": 456559,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450671737.754, "dur": 2.265, + "args": { + "External id": 456560,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450671754.414, "dur": 10.885, + "args": { + "External id": 456561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450671768.668, "dur": 10.002, + "args": { + "External id": 456562,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450671788.021, "dur": 2.121, + "args": { + "External id": 456563,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450671799.306, "dur": 3.469, + "args": { + "External id": 456564,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450671801.428, "dur": 0.536, + "args": { + "External id": 456565,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450671869.227, "dur": 55.242, + "args": { + "External id": 456566,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450671929.555, "dur": 4.835, + "args": { + "External id": 456567,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450671932.895, "dur": 0.522, + "args": { + "External id": 456568,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450671935.795, "dur": 23.315, + "args": { + "External id": 456569,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450671966.399, "dur": 4.880, + "args": { + "External id": 456570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450671967.655, "dur": 2.933, + "args": { + "External id": 456571,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450671969.593, "dur": 0.849, + "args": { + "External id": 456572,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450671973.803, "dur": 40.380, + "args": { + "External id": 456573,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450671974.846, "dur": 38.803, + "args": { + "External id": 456574,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450672017.776, "dur": 13.458, + "args": { + "External id": 456575,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450672037.098, "dur": 5.661, + "args": { + "External id": 456576,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672041.497, "dur": 0.445, + "args": { + "External id": 456577,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450672047.128, "dur": 87.476, + "args": { + "External id": 456578,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450672048.297, "dur": 3.488, + "args": { + "External id": 456579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450672049.163, "dur": 2.060, + "args": { + "External id": 456580,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672050.642, "dur": 0.467, + "args": { + "External id": 456581,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450672052.469, "dur": 81.520, + "args": { + "External id": 456582,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450672055.360, "dur": 77.641, + "args": { + "External id": 456583,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450672140.514, "dur": 4.920, + "args": { + "External id": 456584,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672143.141, "dur": 0.761, + "args": { + "External id": 456585,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450672151.843, "dur": 1.656, + "args": { + "External id": 456586,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450672161.928, "dur": 9.203, + "args": { + "External id": 456587,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450672166.349, "dur": 4.480, + "args": { + "External id": 456588,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450672258.611, "dur": 178.689, + "args": { + "External id": 456589,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450672262.682, "dur": 3.533, + "args": { + "External id": 456590,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450672267.766, "dur": 169.039, + "args": { + "External id": 456591,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450672269.319, "dur": 0.358, + "args": { + "External id": 456592,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450672270.539, "dur": 20.298, + "args": { + "External id": 456593,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450672292.426, "dur": 4.229, + "args": { + "External id": 456594,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672295.950, "dur": 0.518, + "args": { + "External id": 456595,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450672300.026, "dur": 19.558, + "args": { + "External id": 456596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450672300.747, "dur": 1.023, + "args": { + "External id": 456597,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450672302.932, "dur": 16.349, + "args": { + "External id": 456598,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450672305.691, "dur": 2.733, + "args": { + "External id": 456599,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450672320.928, "dur": 23.185, + "args": { + "External id": 456600,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450672345.803, "dur": 14.596, + "args": { + "External id": 456601,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450672363.127, "dur": 12.714, + "args": { + "External id": 456602,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450672377.303, "dur": 11.087, + "args": { + "External id": 456603,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450672390.218, "dur": 21.625, + "args": { + "External id": 456604,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450672392.203, "dur": 1.682, + "args": { + "External id": 456605,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672398.705, "dur": 0.625, + "args": { + "External id": 456606,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450672412.986, "dur": 11.325, + "args": { + "External id": 456607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450672425.394, "dur": 10.440, + "args": { + "External id": 456608,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450672443.610, "dur": 1.487, + "args": { + "External id": 456609,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450672454.096, "dur": 3.424, + "args": { + "External id": 456610,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672456.341, "dur": 0.381, + "args": { + "External id": 456611,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450672517.264, "dur": 50.165, + "args": { + "External id": 456612,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450672572.488, "dur": 7.169, + "args": { + "External id": 456613,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672577.885, "dur": 0.623, + "args": { + "External id": 456614,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450672581.212, "dur": 43.104, + "args": { + "External id": 456615,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450672630.498, "dur": 6.389, + "args": { + "External id": 456616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450672632.248, "dur": 3.771, + "args": { + "External id": 456617,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672634.608, "dur": 1.266, + "args": { + "External id": 456618,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450672640.204, "dur": 47.849, + "args": { + "External id": 456619,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450672643.829, "dur": 43.528, + "args": { + "External id": 456620,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450672691.754, "dur": 14.746, + "args": { + "External id": 456621,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450672711.066, "dur": 26.074, + "args": { + "External id": 456622,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450672713.778, "dur": 22.995, + "args": { + "External id": 456623,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672718.990, "dur": 2.294, + "args": { + "External id": 456624,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450672742.744, "dur": 25.768, + "args": { + "External id": 456625,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450672745.105, "dur": 23.193, + "args": { + "External id": 456626,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 15862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672750.279, "dur": 3.429, + "args": { + "External id": 456627,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450672754.971, "dur": 12.871, + "args": { + "External id": 456628,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2338707, + "ts": 6267450672782.549, "dur": 5.555, + "args": { + "External id": 456629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2338707, + "ts": 6267450672784.786, "dur": 3.029, + "args": { + "External id": 456630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2338707, + "ts": 6267450672789.208, "dur": 1.304, + "args": { + "External id": 456631,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2338707, + "ts": 6267450672789.983, "dur": 0.461, + "args": { + "External id": 456632,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450672834.118, "dur": 24.368, + "args": { + "External id": 456633,"Sequence number": 5036582, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450672863.288, "dur": 12.990, + "args": { + "External id": 456634,"Sequence number": 5036583, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15870 + } + }, + { + "ph": "s", "id": 9, "pid": 2338707, "tid": 2338707, "ts": 6267450672863.288, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450672882.597, "dur": 6.788, + "args": { + "External id": 456635,"Sequence number": 5036584, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 15871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672886.811, "dur": 1.014, + "args": { + "External id": 456636,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2338707, + "ts": 6267450672892.079, "dur": 6.250, + "args": { + "External id": 456637,"Sequence number": 5036584, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "3"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 15873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672896.437, "dur": 0.573, + "args": { + "External id": 456638,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450672899.618, "dur": 4.433, + "args": { + "External id": 456639,"Sequence number": 5036584, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 15875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672903.232, "dur": 0.246, + "args": { + "External id": 456640,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 15876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450672908.266, "dur": 6.285, + "args": { + "External id": 456641,"Sequence number": 5036584, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15877 + } + }, + { + "ph": "s", "id": 8, "pid": 2338707, "tid": 2338707, "ts": 6267450672908.266, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672912.018, "dur": 1.106, + "args": { + "External id": 456642,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450672915.591, "dur": 4.398, + "args": { + "External id": 456643,"Sequence number": 5036585, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15879 + } + }, + { + "ph": "s", "id": 7, "pid": 2338707, "tid": 2338707, "ts": 6267450672915.591, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672918.742, "dur": 0.483, + "args": { + "External id": 456644,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2338707, + "ts": 6267450672920.969, "dur": 11.549, + "args": { + "External id": 456645,"Sequence number": 5036586, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 15881 + } + }, + { + "ph": "s", "id": 6, "pid": 2338707, "tid": 2338707, "ts": 6267450672920.969, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672930.987, "dur": 0.562, + "args": { + "External id": 456646,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450672933.478, "dur": 5.068, + "args": { + "External id": 456647,"Sequence number": 5036587, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 15883 + } + }, + { + "ph": "s", "id": 5, "pid": 2338707, "tid": 2338707, "ts": 6267450672933.478, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450672935.678, "dur": 2.098, + "args": { + "External id": 456648,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 15884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267450672942.359, "dur": 32.646, + "args": { + "External id": 456649,"Sequence number": 5036588, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267450672944.022, "dur": 30.782, + "args": { + "External id": 456650,"Sequence number": 5036588, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450672946.677, "dur": 8.368, + "args": { + "External id": 456651,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 15887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450672948.838, "dur": 5.634, + "args": { + "External id": 456652,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450672955.976, "dur": 18.227, + "args": { + "External id": 456653,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 15889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450673004.290, "dur": 4.172, + "args": { + "External id": 456654,"Sequence number": 5036588, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 15890 + } + }, + { + "ph": "s", "id": 4, "pid": 2338707, "tid": 2338707, "ts": 6267450673004.290, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450673010.951, "dur": 1.087, + "args": { + "External id": 456655,"Sequence number": 5036589, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267450673045.648, "dur": 45861.797, + "args": { + "External id": 456656,"Sequence number": 5036589, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 15892 + } + }, + { + "ph": "s", "id": 3, "pid": 2338707, "tid": 2338707, "ts": 6267450673045.648, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267450673101.554, "dur": 36.604, + "args": { + "External id": 456657,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267450673102.299, "dur": 35.599, + "args": { + "External id": 456658,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450673103.945, "dur": 11.290, + "args": { + "External id": 456659,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450673107.727, "dur": 6.889, + "args": { + "External id": 456660,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450673115.862, "dur": 21.506, + "args": { + "External id": 456661,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 15897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450673156.364, "dur": 24.975, + "args": { + "External id": 456662,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450673157.508, "dur": 5.177, + "args": { + "External id": 456663,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450673159.052, "dur": 3.366, + "args": { + "External id": 456664,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267450673163.972, "dur": 17.150, + "args": { + "External id": 456665,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450673165.735, "dur": 14.955, + "args": { + "External id": 456666,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450673190.212, "dur": 22.166, + "args": { + "External id": 456667,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450673191.039, "dur": 4.969, + "args": { + "External id": 456668,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450673192.753, "dur": 2.985, + "args": { + "External id": 456669,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267450673198.952, "dur": 13.240, + "args": { + "External id": 456670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450673199.563, "dur": 12.199, + "args": { + "External id": 456671,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 15907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2338707, + "ts": 6267450673218.562, "dur": 19.695, + "args": { + "External id": 456672,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 15908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450673220.535, "dur": 4.691, + "args": { + "External id": 456673,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267450673226.164, "dur": 11.770, + "args": { + "External id": 456674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 15910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450673227.006, "dur": 10.608, + "args": { + "External id": 456675,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2338707, + "ts": 6267450673243.419, "dur": 27.387, + "args": { + "External id": 456676,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450673273.706, "dur": 52.381, + "args": { + "External id": 456677,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450673277.937, "dur": 47.796, + "args": { + "External id": 456678,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450673282.508, "dur": 1.265, + "args": { + "External id": 456679,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450673285.316, "dur": 24.024, + "args": { + "External id": 456680,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450673287.138, "dur": 22.028, + "args": { + "External id": 456681,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 15917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450673291.539, "dur": 2.645, + "args": { + "External id": 456682,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450673295.357, "dur": 13.440, + "args": { + "External id": 456683,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 15919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2338707, + "ts": 6267450673330.453, "dur": 39721.275, + "args": { + "External id": 456684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2338707, + "ts": 6267450673332.115, "dur": 39718.677, + "args": { + "External id": 456685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450713084.644, "dur": 7.566, + "args": { + "External id": 456686,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450713088.932, "dur": 1.321, + "args": { + "External id": 456687,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450713099.931, "dur": 109.853, + "args": { + "External id": 456688,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450713101.497, "dur": 6.845, + "args": { + "External id": 456689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450713104.540, "dur": 2.969, + "args": { + "External id": 456690,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450713106.526, "dur": 0.709, + "args": { + "External id": 456691,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450713109.830, "dur": 99.190, + "args": { + "External id": 456692,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450713111.760, "dur": 96.333, + "args": { + "External id": 456693,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450713214.071, "dur": 4.072, + "args": { + "External id": 456694,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450713216.273, "dur": 0.507, + "args": { + "External id": 456695,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450713227.913, "dur": 2.207, + "args": { + "External id": 456696,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450713241.173, "dur": 6.607, + "args": { + "External id": 456697,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450713243.543, "dur": 3.942, + "args": { + "External id": 456698,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450713384.177, "dur": 249.876, + "args": { + "External id": 456699,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450713387.533, "dur": 2.143, + "args": { + "External id": 456700,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450713393.933, "dur": 239.406, + "args": { + "External id": 456701,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450713398.629, "dur": 0.536, + "args": { + "External id": 456702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450713400.827, "dur": 26.625, + "args": { + "External id": 456703,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450713429.307, "dur": 32.788, + "args": { + "External id": 456704,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450713459.361, "dur": 2.377, + "args": { + "External id": 456705,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450713463.404, "dur": 24.879, + "args": { + "External id": 456706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450713465.393, "dur": 1.046, + "args": { + "External id": 456707,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450713467.779, "dur": 20.248, + "args": { + "External id": 456708,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450713472.103, "dur": 3.253, + "args": { + "External id": 456709,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450713492.334, "dur": 23.693, + "args": { + "External id": 456710,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450713517.861, "dur": 13.798, + "args": { + "External id": 456711,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450713534.914, "dur": 15.324, + "args": { + "External id": 456712,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450713551.787, "dur": 12.820, + "args": { + "External id": 456713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450713566.288, "dur": 19.676, + "args": { + "External id": 456714,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450713568.746, "dur": 1.748, + "args": { + "External id": 456715,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450713572.581, "dur": 0.487, + "args": { + "External id": 456716,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450713589.726, "dur": 28.214, + "args": { + "External id": 456717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450713620.128, "dur": 11.620, + "args": { + "External id": 456718,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450713643.070, "dur": 2.448, + "args": { + "External id": 456719,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450713653.744, "dur": 4.260, + "args": { + "External id": 456720,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450713656.449, "dur": 0.459, + "args": { + "External id": 456721,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450713740.743, "dur": 76.417, + "args": { + "External id": 456722,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450713823.001, "dur": 8.806, + "args": { + "External id": 456723,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450713826.258, "dur": 2.877, + "args": { + "External id": 456724,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450713835.663, "dur": 30.085, + "args": { + "External id": 456725,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450713871.362, "dur": 6.597, + "args": { + "External id": 456726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450713873.694, "dur": 3.428, + "args": { + "External id": 456727,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450713875.858, "dur": 0.868, + "args": { + "External id": 456728,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450713881.190, "dur": 45.973, + "args": { + "External id": 456729,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450713882.434, "dur": 44.066, + "args": { + "External id": 456730,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450713931.478, "dur": 16.654, + "args": { + "External id": 456731,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450713956.859, "dur": 4.049, + "args": { + "External id": 456732,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450713959.457, "dur": 0.393, + "args": { + "External id": 456733,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450713965.999, "dur": 49.340, + "args": { + "External id": 456734,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450713966.912, "dur": 3.599, + "args": { + "External id": 456735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450713967.843, "dur": 2.048, + "args": { + "External id": 456736,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450713969.044, "dur": 0.543, + "args": { + "External id": 456737,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450713973.379, "dur": 41.591, + "args": { + "External id": 456738,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450713974.002, "dur": 40.094, + "args": { + "External id": 456739,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450714019.593, "dur": 3.701, + "args": { + "External id": 456740,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450714021.729, "dur": 0.468, + "args": { + "External id": 456741,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450714028.995, "dur": 1.415, + "args": { + "External id": 456742,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450714039.809, "dur": 8.693, + "args": { + "External id": 456743,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450714043.906, "dur": 4.317, + "args": { + "External id": 456744,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450714191.799, "dur": 205.193, + "args": { + "External id": 456745,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450714194.274, "dur": 4.188, + "args": { + "External id": 456746,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450714200.722, "dur": 195.784, + "args": { + "External id": 456747,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450714202.590, "dur": 0.277, + "args": { + "External id": 456748,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450714204.364, "dur": 25.086, + "args": { + "External id": 456749,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450714231.343, "dur": 5.416, + "args": { + "External id": 456750,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450714235.618, "dur": 0.915, + "args": { + "External id": 456751,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450714239.876, "dur": 25.151, + "args": { + "External id": 456752,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450714241.652, "dur": 1.266, + "args": { + "External id": 456753,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450714244.450, "dur": 20.326, + "args": { + "External id": 456754,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450714247.546, "dur": 2.549, + "args": { + "External id": 456755,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450714266.775, "dur": 24.428, + "args": { + "External id": 456756,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450714293.316, "dur": 15.237, + "args": { + "External id": 456757,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450714311.528, "dur": 13.712, + "args": { + "External id": 456758,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450714326.639, "dur": 13.331, + "args": { + "External id": 456759,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450714341.690, "dur": 26.970, + "args": { + "External id": 456760,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450714346.146, "dur": 1.977, + "args": { + "External id": 456761,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450714352.600, "dur": 0.636, + "args": { + "External id": 456762,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450714370.222, "dur": 13.083, + "args": { + "External id": 456763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450714384.337, "dur": 10.993, + "args": { + "External id": 456764,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450714404.434, "dur": 2.128, + "args": { + "External id": 456765,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450714417.392, "dur": 4.434, + "args": { + "External id": 456766,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450714420.343, "dur": 0.445, + "args": { + "External id": 456767,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450714494.077, "dur": 59.780, + "args": { + "External id": 456768,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450714559.457, "dur": 8.149, + "args": { + "External id": 456769,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450714565.661, "dur": 0.621, + "args": { + "External id": 456770,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450714569.185, "dur": 42.805, + "args": { + "External id": 456771,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450714618.614, "dur": 6.412, + "args": { + "External id": 456772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450714620.327, "dur": 3.697, + "args": { + "External id": 456773,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450714622.503, "dur": 1.360, + "args": { + "External id": 456774,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450714637.344, "dur": 51.133, + "args": { + "External id": 456775,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450714640.641, "dur": 47.161, + "args": { + "External id": 456776,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450714692.541, "dur": 15.987, + "args": { + "External id": 456777,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450714715.484, "dur": 4.593, + "args": { + "External id": 456778,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450714718.244, "dur": 0.813, + "args": { + "External id": 456779,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450714724.779, "dur": 47.089, + "args": { + "External id": 456780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450714725.904, "dur": 5.976, + "args": { + "External id": 456781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450714726.768, "dur": 4.481, + "args": { + "External id": 456782,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450714730.529, "dur": 0.598, + "args": { + "External id": 456783,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450714732.494, "dur": 38.985, + "args": { + "External id": 456784,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450714733.155, "dur": 37.658, + "args": { + "External id": 456785,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450714776.393, "dur": 3.695, + "args": { + "External id": 456786,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450714778.500, "dur": 0.517, + "args": { + "External id": 456787,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450714786.253, "dur": 1.471, + "args": { + "External id": 456788,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450714797.078, "dur": 11.293, + "args": { + "External id": 456789,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450714801.480, "dur": 6.624, + "args": { + "External id": 456790,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450714901.508, "dur": 219.655, + "args": { + "External id": 456791,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450714903.394, "dur": 2.038, + "args": { + "External id": 456792,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450714907.350, "dur": 213.105, + "args": { + "External id": 456793,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450714908.856, "dur": 0.306, + "args": { + "External id": 456794,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450714910.148, "dur": 22.066, + "args": { + "External id": 456795,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450714933.850, "dur": 3.265, + "args": { + "External id": 456796,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450714936.125, "dur": 0.677, + "args": { + "External id": 456797,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450714940.451, "dur": 22.088, + "args": { + "External id": 456798,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450714942.102, "dur": 1.060, + "args": { + "External id": 456799,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450714944.518, "dur": 17.700, + "args": { + "External id": 456800,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450714948.898, "dur": 2.580, + "args": { + "External id": 456801,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450714964.071, "dur": 23.924, + "args": { + "External id": 456802,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450714989.403, "dur": 12.422, + "args": { + "External id": 456803,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450715004.462, "dur": 13.395, + "args": { + "External id": 456804,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450715019.286, "dur": 11.048, + "args": { + "External id": 456805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450715032.165, "dur": 56.228, + "args": { + "External id": 456806,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450715036.212, "dur": 1.668, + "args": { + "External id": 456807,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450715040.613, "dur": 0.629, + "args": { + "External id": 456808,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450715091.190, "dur": 15.192, + "args": { + "External id": 456809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450715107.635, "dur": 11.551, + "args": { + "External id": 456810,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450715130.142, "dur": 2.469, + "args": { + "External id": 456811,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450715142.382, "dur": 4.122, + "args": { + "External id": 456812,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450715144.980, "dur": 0.567, + "args": { + "External id": 456813,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450715228.350, "dur": 66.578, + "args": { + "External id": 456814,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450715300.684, "dur": 4.385, + "args": { + "External id": 456815,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450715303.108, "dur": 0.790, + "args": { + "External id": 456816,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450715306.473, "dur": 27.841, + "args": { + "External id": 456817,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450715339.198, "dur": 9.066, + "args": { + "External id": 456818,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450715340.911, "dur": 6.377, + "args": { + "External id": 456819,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450715346.138, "dur": 0.957, + "args": { + "External id": 456820,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450715351.049, "dur": 42.544, + "args": { + "External id": 456821,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450715352.255, "dur": 40.798, + "args": { + "External id": 456822,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450715397.344, "dur": 14.757, + "args": { + "External id": 456823,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450715417.951, "dur": 5.758, + "args": { + "External id": 456824,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450715422.341, "dur": 0.345, + "args": { + "External id": 456825,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450715427.884, "dur": 44.580, + "args": { + "External id": 456826,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450715431.052, "dur": 3.011, + "args": { + "External id": 456827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450715431.862, "dur": 1.657, + "args": { + "External id": 456828,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450715432.743, "dur": 0.625, + "args": { + "External id": 456829,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450715434.591, "dur": 37.369, + "args": { + "External id": 456830,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450715435.230, "dur": 36.135, + "args": { + "External id": 456831,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450715476.331, "dur": 5.438, + "args": { + "External id": 456832,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450715478.088, "dur": 2.473, + "args": { + "External id": 456833,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450715488.926, "dur": 1.360, + "args": { + "External id": 456834,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450715498.693, "dur": 8.320, + "args": { + "External id": 456835,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450715500.627, "dur": 6.119, + "args": { + "External id": 456836,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450715590.676, "dur": 201.972, + "args": { + "External id": 456837,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450715609.282, "dur": 2.913, + "args": { + "External id": 456838,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450715616.183, "dur": 176.015, + "args": { + "External id": 456839,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450715617.512, "dur": 0.383, + "args": { + "External id": 456840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450715619.264, "dur": 25.749, + "args": { + "External id": 456841,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450715646.713, "dur": 3.760, + "args": { + "External id": 456842,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450715649.236, "dur": 0.979, + "args": { + "External id": 456843,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450715651.568, "dur": 23.481, + "args": { + "External id": 456844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450715652.611, "dur": 1.164, + "args": { + "External id": 456845,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450715655.151, "dur": 19.600, + "args": { + "External id": 456846,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450715660.888, "dur": 2.514, + "args": { + "External id": 456847,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450715676.414, "dur": 20.115, + "args": { + "External id": 456848,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450715697.990, "dur": 13.884, + "args": { + "External id": 456849,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450715716.905, "dur": 11.852, + "args": { + "External id": 456850,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450715730.267, "dur": 11.784, + "args": { + "External id": 456851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450715743.915, "dur": 18.291, + "args": { + "External id": 456852,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450715746.139, "dur": 1.576, + "args": { + "External id": 456853,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450715749.571, "dur": 0.536, + "args": { + "External id": 456854,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450715763.687, "dur": 12.727, + "args": { + "External id": 456855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450715779.965, "dur": 11.334, + "args": { + "External id": 456856,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450715800.467, "dur": 2.167, + "args": { + "External id": 456857,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450715812.428, "dur": 3.614, + "args": { + "External id": 456858,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450715814.876, "dur": 0.348, + "args": { + "External id": 456859,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450715888.151, "dur": 61.891, + "args": { + "External id": 456860,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450715955.408, "dur": 4.797, + "args": { + "External id": 456861,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450715958.462, "dur": 0.658, + "args": { + "External id": 456862,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450715961.691, "dur": 27.058, + "args": { + "External id": 456863,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450715994.148, "dur": 8.061, + "args": { + "External id": 456864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450715998.195, "dur": 3.321, + "args": { + "External id": 456865,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450716000.494, "dur": 0.854, + "args": { + "External id": 456866,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450716005.166, "dur": 43.089, + "args": { + "External id": 456867,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450716006.404, "dur": 41.217, + "args": { + "External id": 456868,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450716100.094, "dur": 20.539, + "args": { + "External id": 456869,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450716130.716, "dur": 6.972, + "args": { + "External id": 456870,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450716135.642, "dur": 0.689, + "args": { + "External id": 456871,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450716142.677, "dur": 55.935, + "args": { + "External id": 456872,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450716143.739, "dur": 6.123, + "args": { + "External id": 456873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450716144.869, "dur": 4.355, + "args": { + "External id": 456874,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450716146.323, "dur": 2.710, + "args": { + "External id": 456875,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450716150.611, "dur": 47.505, + "args": { + "External id": 456876,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450716151.378, "dur": 46.237, + "args": { + "External id": 456877,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450716203.529, "dur": 3.994, + "args": { + "External id": 456878,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450716205.895, "dur": 0.473, + "args": { + "External id": 456879,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450716216.960, "dur": 1.662, + "args": { + "External id": 456880,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450716227.909, "dur": 7.331, + "args": { + "External id": 456881,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450716230.298, "dur": 4.634, + "args": { + "External id": 456882,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450716330.751, "dur": 179.962, + "args": { + "External id": 456883,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450716333.462, "dur": 1.868, + "args": { + "External id": 456884,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450716339.062, "dur": 171.205, + "args": { + "External id": 456885,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450716340.761, "dur": 0.641, + "args": { + "External id": 456886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450716344.682, "dur": 20.315, + "args": { + "External id": 456887,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450716366.767, "dur": 3.355, + "args": { + "External id": 456888,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450716369.269, "dur": 0.628, + "args": { + "External id": 456889,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450716371.160, "dur": 22.544, + "args": { + "External id": 456890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450716372.169, "dur": 2.485, + "args": { + "External id": 456891,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450716376.114, "dur": 17.287, + "args": { + "External id": 456892,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450716378.939, "dur": 3.146, + "args": { + "External id": 456893,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450716397.569, "dur": 21.625, + "args": { + "External id": 456894,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450716420.814, "dur": 12.060, + "args": { + "External id": 456895,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450716435.944, "dur": 13.699, + "args": { + "External id": 456896,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450716451.022, "dur": 11.294, + "args": { + "External id": 456897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450716464.036, "dur": 19.154, + "args": { + "External id": 456898,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450716466.136, "dur": 1.667, + "args": { + "External id": 456899,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450716470.022, "dur": 0.752, + "args": { + "External id": 456900,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450716486.683, "dur": 11.440, + "args": { + "External id": 456901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450716499.159, "dur": 10.084, + "args": { + "External id": 456902,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450716517.286, "dur": 1.842, + "args": { + "External id": 456903,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450716528.329, "dur": 3.920, + "args": { + "External id": 456904,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450716531.047, "dur": 0.421, + "args": { + "External id": 456905,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450716614.150, "dur": 60.990, + "args": { + "External id": 456906,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450716681.189, "dur": 7.804, + "args": { + "External id": 456907,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450716684.239, "dur": 3.175, + "args": { + "External id": 456908,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450716692.875, "dur": 36.621, + "args": { + "External id": 456909,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450716734.861, "dur": 5.818, + "args": { + "External id": 456910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450716736.947, "dur": 2.817, + "args": { + "External id": 456911,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450716738.756, "dur": 0.798, + "args": { + "External id": 456912,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450716743.209, "dur": 43.055, + "args": { + "External id": 456913,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450716744.434, "dur": 41.248, + "args": { + "External id": 456914,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450716790.229, "dur": 17.347, + "args": { + "External id": 456915,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450716815.923, "dur": 3.582, + "args": { + "External id": 456916,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450716818.297, "dur": 0.357, + "args": { + "External id": 456917,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450716823.594, "dur": 46.584, + "args": { + "External id": 456918,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450716824.742, "dur": 3.424, + "args": { + "External id": 456919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450716825.329, "dur": 2.301, + "args": { + "External id": 456920,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450716826.883, "dur": 0.640, + "args": { + "External id": 456921,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450716831.260, "dur": 38.564, + "args": { + "External id": 456922,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450716831.975, "dur": 37.155, + "args": { + "External id": 456923,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450716874.074, "dur": 3.238, + "args": { + "External id": 456924,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450716875.784, "dur": 0.507, + "args": { + "External id": 456925,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450716883.720, "dur": 1.616, + "args": { + "External id": 456926,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450716893.214, "dur": 11.193, + "args": { + "External id": 456927,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450716897.681, "dur": 6.483, + "args": { + "External id": 456928,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450716993.363, "dur": 220.406, + "args": { + "External id": 456929,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450716995.484, "dur": 3.582, + "args": { + "External id": 456930,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450717000.410, "dur": 212.828, + "args": { + "External id": 456931,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450717001.836, "dur": 0.681, + "args": { + "External id": 456932,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450717004.086, "dur": 21.013, + "args": { + "External id": 456933,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450717026.651, "dur": 4.502, + "args": { + "External id": 456934,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450717030.374, "dur": 0.521, + "args": { + "External id": 456935,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450717034.326, "dur": 58.545, + "args": { + "External id": 456936,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450717035.637, "dur": 0.982, + "args": { + "External id": 456937,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450717037.817, "dur": 54.372, + "args": { + "External id": 456938,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450717040.516, "dur": 2.395, + "args": { + "External id": 456939,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450717094.998, "dur": 24.284, + "args": { + "External id": 456940,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450717120.720, "dur": 12.918, + "args": { + "External id": 456941,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450717136.464, "dur": 11.714, + "args": { + "External id": 456942,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450717149.783, "dur": 11.541, + "args": { + "External id": 456943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450717163.271, "dur": 24.100, + "args": { + "External id": 456944,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450717165.744, "dur": 1.957, + "args": { + "External id": 456945,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450717172.299, "dur": 2.636, + "args": { + "External id": 456946,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450717188.856, "dur": 11.493, + "args": { + "External id": 456947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450717201.431, "dur": 10.360, + "args": { + "External id": 456948,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450717222.599, "dur": 2.123, + "args": { + "External id": 456949,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450717234.450, "dur": 3.881, + "args": { + "External id": 456950,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450717236.837, "dur": 0.521, + "args": { + "External id": 456951,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450717307.733, "dur": 57.163, + "args": { + "External id": 456952,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450717370.762, "dur": 7.381, + "args": { + "External id": 456953,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450717376.476, "dur": 0.607, + "args": { + "External id": 456954,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450717379.372, "dur": 24.406, + "args": { + "External id": 456955,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450717408.483, "dur": 5.849, + "args": { + "External id": 456956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450717410.352, "dur": 3.215, + "args": { + "External id": 456957,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450717412.217, "dur": 1.170, + "args": { + "External id": 456958,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450717416.845, "dur": 44.004, + "args": { + "External id": 456959,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450717420.334, "dur": 39.633, + "args": { + "External id": 456960,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450717464.746, "dur": 13.693, + "args": { + "External id": 456961,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450717483.679, "dur": 3.514, + "args": { + "External id": 456962,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450717485.841, "dur": 0.416, + "args": { + "External id": 456963,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450717491.180, "dur": 45.716, + "args": { + "External id": 456964,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450717492.189, "dur": 5.932, + "args": { + "External id": 456965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450717493.367, "dur": 4.149, + "args": { + "External id": 456966,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450717497.041, "dur": 0.331, + "args": { + "External id": 456967,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450717499.113, "dur": 37.361, + "args": { + "External id": 456968,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450717499.662, "dur": 36.266, + "args": { + "External id": 456969,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450717541.476, "dur": 5.580, + "args": { + "External id": 456970,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450717543.659, "dur": 2.298, + "args": { + "External id": 456971,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450717552.738, "dur": 1.347, + "args": { + "External id": 456972,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450717561.990, "dur": 7.802, + "args": { + "External id": 456973,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450717566.006, "dur": 3.540, + "args": { + "External id": 456974,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450717672.053, "dur": 176.990, + "args": { + "External id": 456975,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450717675.979, "dur": 3.011, + "args": { + "External id": 456976,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450717680.561, "dur": 167.695, + "args": { + "External id": 456977,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450717681.980, "dur": 0.551, + "args": { + "External id": 456978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450717683.509, "dur": 20.727, + "args": { + "External id": 456979,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450717705.806, "dur": 4.999, + "args": { + "External id": 456980,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450717709.574, "dur": 0.959, + "args": { + "External id": 456981,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450717714.036, "dur": 20.561, + "args": { + "External id": 456982,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450717715.170, "dur": 0.941, + "args": { + "External id": 456983,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450717717.253, "dur": 16.963, + "args": { + "External id": 456984,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450717719.686, "dur": 2.930, + "args": { + "External id": 456985,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450717735.971, "dur": 20.799, + "args": { + "External id": 456986,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450717758.377, "dur": 14.168, + "args": { + "External id": 456987,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450717775.503, "dur": 13.656, + "args": { + "External id": 456988,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450717790.467, "dur": 11.193, + "args": { + "External id": 456989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450717803.186, "dur": 20.717, + "args": { + "External id": 456990,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450717807.860, "dur": 1.399, + "args": { + "External id": 456991,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450717811.009, "dur": 0.616, + "args": { + "External id": 456992,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450717824.995, "dur": 10.829, + "args": { + "External id": 456993,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450717837.038, "dur": 10.320, + "args": { + "External id": 456994,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450717855.828, "dur": 2.115, + "args": { + "External id": 456995,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450717867.101, "dur": 3.943, + "args": { + "External id": 456996,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450717869.552, "dur": 0.667, + "args": { + "External id": 456997,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450717931.959, "dur": 54.303, + "args": { + "External id": 456998,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450717991.502, "dur": 4.698, + "args": { + "External id": 456999,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450717994.452, "dur": 0.658, + "args": { + "External id": 457000,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450717997.663, "dur": 24.824, + "args": { + "External id": 457001,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450718027.294, "dur": 7.620, + "args": { + "External id": 457002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450718028.914, "dur": 5.302, + "args": { + "External id": 457003,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450718032.874, "dur": 1.164, + "args": { + "External id": 457004,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450718037.565, "dur": 78.654, + "args": { + "External id": 457005,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450718038.895, "dur": 76.058, + "args": { + "External id": 457006,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450718122.121, "dur": 15.564, + "args": { + "External id": 457007,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450718144.750, "dur": 5.023, + "args": { + "External id": 457008,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450718147.709, "dur": 0.862, + "args": { + "External id": 457009,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267450718154.192, "dur": 49.564, + "args": { + "External id": 457010,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450718157.601, "dur": 4.100, + "args": { + "External id": 457011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450718158.971, "dur": 2.184, + "args": { + "External id": 457012,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450718160.607, "dur": 0.421, + "args": { + "External id": 457013,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450718162.377, "dur": 41.010, + "args": { + "External id": 457014,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450718163.010, "dur": 39.669, + "args": { + "External id": 457015,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450718208.320, "dur": 3.687, + "args": { + "External id": 457016,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450718210.315, "dur": 0.670, + "args": { + "External id": 457017,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450718220.187, "dur": 1.360, + "args": { + "External id": 457018,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450718229.289, "dur": 9.069, + "args": { + "External id": 457019,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450718231.254, "dur": 6.767, + "args": { + "External id": 457020,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450718321.772, "dur": 194.045, + "args": { + "External id": 457021,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450718323.772, "dur": 4.002, + "args": { + "External id": 457022,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267450718331.501, "dur": 183.778, + "args": { + "External id": 457023,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267450718332.985, "dur": 0.376, + "args": { + "External id": 457024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267450718334.415, "dur": 27.842, + "args": { + "External id": 457025,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267450718364.010, "dur": 2.836, + "args": { + "External id": 457026,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450718365.878, "dur": 0.783, + "args": { + "External id": 457027,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450718367.841, "dur": 23.290, + "args": { + "External id": 457028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267450718369.520, "dur": 1.257, + "args": { + "External id": 457029,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267450718371.917, "dur": 18.824, + "args": { + "External id": 457030,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450718375.986, "dur": 2.387, + "args": { + "External id": 457031,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267450718392.529, "dur": 20.905, + "args": { + "External id": 457032,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450718415.012, "dur": 15.122, + "args": { + "External id": 457033,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267450718435.169, "dur": 14.459, + "args": { + "External id": 457034,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267450718451.195, "dur": 13.376, + "args": { + "External id": 457035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450718466.321, "dur": 19.941, + "args": { + "External id": 457036,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267450718468.705, "dur": 1.390, + "args": { + "External id": 457037,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450718471.969, "dur": 0.346, + "args": { + "External id": 457038,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267450718487.543, "dur": 13.002, + "args": { + "External id": 457039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450718503.823, "dur": 10.481, + "args": { + "External id": 457040,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267450718522.301, "dur": 1.538, + "args": { + "External id": 457041,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450718532.148, "dur": 3.600, + "args": { + "External id": 457042,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450718534.403, "dur": 0.529, + "args": { + "External id": 457043,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450718616.965, "dur": 57.660, + "args": { + "External id": 457044,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267450718680.452, "dur": 6.118, + "args": { + "External id": 457045,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450718684.213, "dur": 0.850, + "args": { + "External id": 457046,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450718687.787, "dur": 27.482, + "args": { + "External id": 457047,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267450718720.090, "dur": 7.356, + "args": { + "External id": 457048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267450718723.862, "dur": 2.727, + "args": { + "External id": 457049,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450718725.690, "dur": 0.672, + "args": { + "External id": 457050,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267450718730.111, "dur": 41.803, + "args": { + "External id": 457051,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267450718730.999, "dur": 40.152, + "args": { + "External id": 457052,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450718775.639, "dur": 16.504, + "args": { + "External id": 457053,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450718796.442, "dur": 26.333, + "args": { + "External id": 457054,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 16290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267450718799.039, "dur": 23.292, + "args": { + "External id": 457055,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450718805.584, "dur": 0.520, + "args": { + "External id": 457056,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 16292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267450718827.873, "dur": 32.877, + "args": { + "External id": 457057,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 16293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267450718832.382, "dur": 28.108, + "args": { + "External id": 457058,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 16294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450718837.451, "dur": 3.656, + "args": { + "External id": 457059,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267450718842.263, "dur": 17.810, + "args": { + "External id": 457060,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2338707, + "ts": 6267450718872.904, "dur": 5.572, + "args": { + "External id": 457061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 16297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2338707, + "ts": 6267450718875.068, "dur": 3.117, + "args": { + "External id": 457062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 16298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2338707, + "ts": 6267450718879.664, "dur": 3.621, + "args": { + "External id": 457063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2338707, + "ts": 6267450718882.503, "dur": 0.640, + "args": { + "External id": 457064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450718927.219, "dur": 22.849, + "args": { + "External id": 457065,"Sequence number": 5036590, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 16301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267450718952.225, "dur": 14.957, + "args": { + "External id": 457066,"Sequence number": 5036591, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 16302 + } + }, + { + "ph": "s", "id": 2, "pid": 2338707, "tid": 2338707, "ts": 6267450718952.225, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267450719117.903, "dur": 42.383, + "args": { + "External id": 457067,"Record function id": 0, "Ev Idx": 16303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338707, "tid": 2338707, + "ts": 6267450719269.905, "dur": 42.820, + "args": { + "External id": 457068,"Sequence number": 5036592, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 16304 + } + }, + { + "ph": "s", "id": 1, "pid": 2338707, "tid": 2338707, "ts": 6267450719269.905, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450719393.196, "dur": 39.044, + "args": { + "External id": 457069,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 16305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267450719403.435, "dur": 9.768, + "args": { + "External id": 457070,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 16306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267450719408.025, "dur": 4.546, + "args": { + "External id": 457071,"Record function id": 0, "Concrete Inputs": ["[1]", "[1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267450719414.815, "dur": 16.969, + "args": { + "External id": 457072,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 16308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338707, "tid": 2338707, + "ts": 6267452812900.423, "dur": 73.262, + "args": { + "External id": 457073,"Sequence number": 5036593, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 16309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338707, "tid": 2338707, + "ts": 6267452812986.052, "dur": 19.644, + "args": { + "External id": 457074,"Sequence number": 5036594, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 16310 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 2338707, "tid": 2338707, + "ts": 6267452813054.369, "dur": 3590.081, + "args": { + "External id": 457075,"Record function id": 0, "Ev Idx": 16311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452814809.561, "dur": 8.338, + "args": { + "External id": 457076,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338707, "tid": 2338707, + "ts": 6267452814832.975, "dur": 8.150, + "args": { + "External id": 457077,"Sequence number": 5036595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 16313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452815333.714, "dur": 2.421, + "args": { + "External id": 457078,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338707, "tid": 2338707, + "ts": 6267452815341.546, "dur": 3.080, + "args": { + "External id": 457079,"Sequence number": 5036595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 16315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452816343.420, "dur": 10.640, + "args": { + "External id": 457080,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338707, "tid": 2338707, + "ts": 6267452816374.838, "dur": 8.252, + "args": { + "External id": 457081,"Sequence number": 5036595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 16317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267452817024.784, "dur": 18.180, + "args": { + "External id": 457082,"Sequence number": 5036595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 16318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452817035.383, "dur": 2.953, + "args": { + "External id": 457083,"Record function id": 0, "Concrete Inputs": ["", "[8, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 16319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267452817046.040, "dur": 5.465, + "args": { + "External id": 457084,"Sequence number": 5036595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 16320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452817048.406, "dur": 1.589, + "args": { + "External id": 457085,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 16321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267452817144.885, "dur": 14007.176, + "args": { + "External id": 457086,"Sequence number": 5036595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], [], []], "Ev Idx": 16322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452817154.458, "dur": 13995.648, + "args": { + "External id": 457087,"Sequence number": 5036595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 16323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452817163.343, "dur": 15.685, + "args": { + "External id": 457088,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452817181.237, "dur": 13964.942, + "args": { + "External id": 457089,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267452817192.940, "dur": 0.725, + "args": { + "External id": 457090,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 16326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338707, "tid": 2338707, + "ts": 6267452817197.505, "dur": 13.626, + "args": { + "External id": 457091,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[8, 4096], [8, 4096]], "Ev Idx": 16327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338707, "tid": 2338707, + "ts": 6267452817200.960, "dur": 9.966, + "args": { + "External id": 457092,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[8, 4096], [], []], "Ev Idx": 16328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452817209.560, "dur": 1.065, + "args": { + "External id": 457093,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 16329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267452817213.470, "dur": 162.609, + "args": { + "External id": 457094,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 16330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267452817215.638, "dur": 160.172, + "args": { + "External id": 457095,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 16331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267452817218.312, "dur": 8.868, + "args": { + "External id": 457096,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 16332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452817221.297, "dur": 5.304, + "args": { + "External id": 457097,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452817231.455, "dur": 143.935, + "args": { + "External id": 457098,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452817378.117, "dur": 13757.523, + "args": { + "External id": 457099,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267452831188.186, "dur": 372.848, + "args": { + "External id": 457100,"Sequence number": 5036595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 16336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452831192.224, "dur": 367.927, + "args": { + "External id": 457101,"Sequence number": 5036595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], []], "Ev Idx": 16337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452831203.113, "dur": 14.211, + "args": { + "External id": 457102,"Record function id": 0, "Concrete Inputs": ["[8, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452831220.065, "dur": 338.356, + "args": { + "External id": 457103,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[8, 8192], [8, 8192], []], "Ev Idx": 16339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338707, "tid": 2338707, + "ts": 6267452831589.236, "dur": 83.315, + "args": { + "External id": 457104,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452831614.340, "dur": 6.839, + "args": { + "External id": 457105,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338707, "tid": 2338707, + "ts": 6267452831624.559, "dur": 47.609, + "args": { + "External id": 457106,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 16342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267452831629.659, "dur": 9.660, + "args": { + "External id": 457107,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 2338707, "tid": 2338707, + "ts": 6267452831689.210, "dur": 87.472, + "args": { + "External id": 457108,"Sequence number": 5036595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338707, "tid": 2338707, + "ts": 6267452831694.542, "dur": 10.327, + "args": { + "External id": 457109,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 16345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452831699.178, "dur": 5.317, + "args": { + "External id": 457110,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452831706.874, "dur": 3.452, + "args": { + "External id": 457111,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2338707, + "ts": 6267452831712.501, "dur": 2.269, + "args": { + "External id": 457112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[8, 4096]], "Ev Idx": 16348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338707, "tid": 2338707, + "ts": 6267452831721.159, "dur": 8.240, + "args": { + "External id": 457113,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 16349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452831728.313, "dur": 0.919, + "args": { + "External id": 457114,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 16350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338707, "tid": 2338707, + "ts": 6267452831730.587, "dur": 4.336, + "args": { + "External id": 457115,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 16351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452831733.914, "dur": 0.908, + "args": { + "External id": 457116,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 16352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338707, "tid": 2338707, + "ts": 6267452831737.349, "dur": 5.632, + "args": { + "External id": 457117,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [8, 1, 1, 4096]], "Ev Idx": 16353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338707, "tid": 2338707, + "ts": 6267452831739.307, "dur": 3.499, + "args": { + "External id": 457118,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 16354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452831741.558, "dur": 1.134, + "args": { + "External id": 457119,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 16355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452831744.343, "dur": 31.406, + "args": { + "External id": 457120,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[8, 1, 1, 4096], [8, 1, 1, 4096], []], "Ev Idx": 16356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267452831787.021, "dur": 35.894, + "args": { + "External id": 457121,"Sequence number": 5036595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 16357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452831788.958, "dur": 33.735, + "args": { + "External id": 457122,"Sequence number": 5036595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 16358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452831797.593, "dur": 3.970, + "args": { + "External id": 457123,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452831802.555, "dur": 19.578, + "args": { + "External id": 457124,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16360 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267452831998.489, "dur": 250.486, + "args": { + "External id": 457125,"Record function id": 0, "Ev Idx": 16361 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 2338707, "tid": 2338707, + "ts": 6267452832161.827, "dur": 73.958, + "args": { + "External id": 457126,"Record function id": 0, "Ev Idx": 16362 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267452832257.977, "dur": 63.006, + "args": { + "External id": 457127,"Record function id": 0, "Ev Idx": 16363 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267452832331.501, "dur": 14596.526, + "args": { + "External id": 457128,"Record function id": 0, "Ev Idx": 16364 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 2338707, "tid": 2338707, + "ts": 6267452832340.681, "dur": 2380.313, + "args": { + "External id": 457129,"Record function id": 0, "Ev Idx": 16365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452832479.632, "dur": 13.040, + "args": { + "External id": 457130,"Record function id": 0, "Concrete Inputs": ["[141824512]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267452832513.422, "dur": 211.043, + "args": { + "External id": 457131,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 16367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832520.596, "dur": 5.161, + "args": { + "External id": 457132,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832531.578, "dur": 0.544, + "args": { + "External id": 457133,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832533.745, "dur": 0.470, + "args": { + "External id": 457134,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "16384512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832536.323, "dur": 3.141, + "args": { + "External id": 457135,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "18481664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832541.873, "dur": 0.528, + "args": { + "External id": 457136,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19005952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832544.475, "dur": 0.564, + "args": { + "External id": 457137,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "19530240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832550.055, "dur": 0.370, + "args": { + "External id": 457138,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832552.161, "dur": 0.556, + "args": { + "External id": 457139,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "21627904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832554.303, "dur": 3.092, + "args": { + "External id": 457140,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "28967936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832561.466, "dur": 0.349, + "args": { + "External id": 457141,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "36307968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832564.328, "dur": 0.496, + "args": { + "External id": 457142,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832566.850, "dur": 3.096, + "args": { + "External id": 457143,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "43648512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832571.835, "dur": 0.451, + "args": { + "External id": 457144,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45745664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832574.391, "dur": 0.490, + "args": { + "External id": 457145,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46269952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832579.987, "dur": 0.628, + "args": { + "External id": 457146,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "46794240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832582.332, "dur": 0.556, + "args": { + "External id": 457147,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832584.742, "dur": 3.113, + "args": { + "External id": 457148,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "48891904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832611.536, "dur": 0.745, + "args": { + "External id": 457149,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "56231936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832614.733, "dur": 0.752, + "args": { + "External id": 457150,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "63571968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832617.626, "dur": 2.526, + "args": { + "External id": 457151,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832622.413, "dur": 0.601, + "args": { + "External id": 457152,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "70912512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832624.961, "dur": 0.601, + "args": { + "External id": 457153,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73009664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832630.690, "dur": 0.556, + "args": { + "External id": 457154,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73533952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832633.964, "dur": 0.554, + "args": { + "External id": 457155,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "74058240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832636.580, "dur": 3.667, + "args": { + "External id": 457156,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832644.407, "dur": 0.244, + "args": { + "External id": 457157,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "76155904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832646.098, "dur": 0.678, + "args": { + "External id": 457158,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "83495936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832649.040, "dur": 2.815, + "args": { + "External id": 457159,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "90835968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832654.022, "dur": 0.539, + "args": { + "External id": 457160,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832656.806, "dur": 0.481, + "args": { + "External id": 457161,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "98176512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832662.353, "dur": 0.262, + "args": { + "External id": 457162,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100273664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832664.467, "dur": 0.565, + "args": { + "External id": 457163,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100797952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832667.159, "dur": 3.647, + "args": { + "External id": 457164,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "101322240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832674.816, "dur": 0.360, + "args": { + "External id": 457165,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832676.930, "dur": 0.474, + "args": { + "External id": 457166,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "103419904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832679.301, "dur": 2.757, + "args": { + "External id": 457167,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "110759936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832684.091, "dur": 0.584, + "args": { + "External id": 457168,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "118099968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832687.095, "dur": 0.592, + "args": { + "External id": 457169,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452832691.756, "dur": 0.493, + "args": { + "External id": 457170,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "125440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452832766.849, "dur": 176.695, + "args": { + "External id": 457171,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267452833051.091, "dur": 681.830, + "args": { + "External id": 457172,"Record function id": 0, "Concrete Inputs": ["", "", "141824512", "8", "1", "15", ""], "Input type": ["TensorList", "", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 16408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452833130.726, "dur": 15.254, + "args": { + "External id": 457173,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267452833155.336, "dur": 23.694, + "args": { + "External id": 457174,"Record function id": 0, "Concrete Inputs": ["", "0", "141824512", "141824512"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 16410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267452833163.834, "dur": 14.704, + "args": { + "External id": 457175,"Record function id": 0, "Concrete Inputs": ["", "0", "141824512", "283649024", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[1134596096], [], [], [], []], "Ev Idx": 16411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833169.084, "dur": 3.662, + "args": { + "External id": 457176,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "[1]", "141824512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 16412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267452833192.290, "dur": 184.321, + "args": { + "External id": 457177,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 16413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833196.033, "dur": 0.863, + "args": { + "External id": 457178,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "141824512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833199.857, "dur": 3.040, + "args": { + "External id": 457179,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "158208512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833204.589, "dur": 0.772, + "args": { + "External id": 457180,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "158209024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833207.229, "dur": 0.593, + "args": { + "External id": 457181,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "160306176"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833212.172, "dur": 0.312, + "args": { + "External id": 457182,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "160830464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833214.693, "dur": 0.497, + "args": { + "External id": 457183,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "161354752"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833217.056, "dur": 0.480, + "args": { + "External id": 457184,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "163451904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833221.895, "dur": 3.537, + "args": { + "External id": 457185,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "163452416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833227.342, "dur": 0.858, + "args": { + "External id": 457186,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "170792448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833230.016, "dur": 3.788, + "args": { + "External id": 457187,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "178132480"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833235.700, "dur": 0.530, + "args": { + "External id": 457188,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "185472512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833238.277, "dur": 0.312, + "args": { + "External id": 457189,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "185473024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833242.709, "dur": 0.396, + "args": { + "External id": 457190,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "187570176"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833244.784, "dur": 0.586, + "args": { + "External id": 457191,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "188094464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833247.433, "dur": 0.496, + "args": { + "External id": 457192,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "188618752"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833252.161, "dur": 3.273, + "args": { + "External id": 457193,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "190715904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833256.966, "dur": 0.831, + "args": { + "External id": 457194,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "190716416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833259.593, "dur": 3.150, + "args": { + "External id": 457195,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "198056448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833264.226, "dur": 0.765, + "args": { + "External id": 457196,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "205396480"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833266.737, "dur": 0.454, + "args": { + "External id": 457197,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "212736512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833271.393, "dur": 0.288, + "args": { + "External id": 457198,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "212737024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833273.531, "dur": 0.580, + "args": { + "External id": 457199,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "214834176"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833275.717, "dur": 0.542, + "args": { + "External id": 457200,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "215358464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833280.263, "dur": 3.223, + "args": { + "External id": 457201,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "215882752"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833284.976, "dur": 0.508, + "args": { + "External id": 457202,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "217979904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833286.829, "dur": 3.557, + "args": { + "External id": 457203,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "217980416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833292.605, "dur": 0.586, + "args": { + "External id": 457204,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "225320448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833295.516, "dur": 0.415, + "args": { + "External id": 457205,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "232660480"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833300.893, "dur": 0.611, + "args": { + "External id": 457206,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "240000512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833303.963, "dur": 0.735, + "args": { + "External id": 457207,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "240001024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833306.902, "dur": 0.596, + "args": { + "External id": 457208,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "242098176"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833311.718, "dur": 3.513, + "args": { + "External id": 457209,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "242622464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833317.823, "dur": 0.668, + "args": { + "External id": 457210,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "243146752"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833320.944, "dur": 3.580, + "args": { + "External id": 457211,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "245243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833326.746, "dur": 0.793, + "args": { + "External id": 457212,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "245244416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833330.350, "dur": 0.506, + "args": { + "External id": 457213,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "252584448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833336.290, "dur": 0.712, + "args": { + "External id": 457214,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "259924480"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833339.896, "dur": 0.555, + "args": { + "External id": 457215,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "267264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452833343.324, "dur": 0.966, + "args": { + "External id": 457216,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "267265024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452833466.381, "dur": 228.331, + "args": { + "External id": 463361,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267452833843.625, "dur": 605.061, + "args": { + "External id": 463362,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[1134596096], [141824512], [], [], []], "Ev Idx": 16454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267452833894.534, "dur": 543.809, + "args": { + "External id": 463363,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1134596096, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[141824512], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16455, "In msg nelems": 141824512 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267452833911.806, "dur": 515.250, + "args": { + "External id": 463364,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[141824512]], "Ev Idx": 16456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267452834510.738, "dur": 4.610, + "args": { + "External id": 463365,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16457, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 2338707, "tid": 2338707, + "ts": 6267452834756.503, "dur": 11788.528, + "args": { + "External id": 463366,"Record function id": 0, "Ev Idx": 16458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835177.676, "dur": 10.820, + "args": { + "External id": 463367,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 16459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835196.216, "dur": 2.016, + "args": { + "External id": 463368,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 16460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835201.588, "dur": 1.855, + "args": { + "External id": 463369,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835207.621, "dur": 1.955, + "args": { + "External id": 463370,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835212.079, "dur": 2.215, + "args": { + "External id": 463371,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835217.751, "dur": 1.699, + "args": { + "External id": 463372,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835225.783, "dur": 1.585, + "args": { + "External id": 463373,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835230.339, "dur": 7.419, + "args": { + "External id": 463374,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835240.173, "dur": 1.241, + "args": { + "External id": 463375,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835243.961, "dur": 1.925, + "args": { + "External id": 463376,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835251.832, "dur": 1.624, + "args": { + "External id": 463377,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835256.738, "dur": 1.258, + "args": { + "External id": 463378,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835260.759, "dur": 1.437, + "args": { + "External id": 463379,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835264.967, "dur": 1.456, + "args": { + "External id": 463380,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835271.773, "dur": 1.304, + "args": { + "External id": 463381,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835276.543, "dur": 6.942, + "args": { + "External id": 463382,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835285.914, "dur": 1.255, + "args": { + "External id": 463383,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835290.410, "dur": 1.207, + "args": { + "External id": 463384,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835302.418, "dur": 1.193, + "args": { + "External id": 463385,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835307.195, "dur": 1.488, + "args": { + "External id": 463386,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835311.437, "dur": 1.682, + "args": { + "External id": 463387,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835316.593, "dur": 1.310, + "args": { + "External id": 463388,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835323.149, "dur": 1.402, + "args": { + "External id": 463389,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835328.126, "dur": 7.972, + "args": { + "External id": 463390,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835340.366, "dur": 2.997, + "args": { + "External id": 463391,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835348.192, "dur": 1.568, + "args": { + "External id": 463392,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835355.224, "dur": 1.322, + "args": { + "External id": 463393,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835359.741, "dur": 1.232, + "args": { + "External id": 463394,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835363.414, "dur": 1.486, + "args": { + "External id": 463395,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835367.643, "dur": 1.376, + "args": { + "External id": 463396,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835375.673, "dur": 1.444, + "args": { + "External id": 463397,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835381.416, "dur": 7.881, + "args": { + "External id": 463398,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835391.936, "dur": 1.564, + "args": { + "External id": 463399,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835396.640, "dur": 2.246, + "args": { + "External id": 463400,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835405.052, "dur": 1.449, + "args": { + "External id": 463401,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835409.769, "dur": 0.960, + "args": { + "External id": 463402,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835413.124, "dur": 1.413, + "args": { + "External id": 463403,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835417.517, "dur": 1.429, + "args": { + "External id": 463404,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835424.577, "dur": 1.773, + "args": { + "External id": 463405,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452835429.550, "dur": 8.070, + "args": { + "External id": 463406,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 16498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452835491.902, "dur": 10936.246, + "args": { + "External id": 463407,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 16499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452835533.960, "dur": 10870.218, + "args": { + "External id": 463408,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 16500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452835566.235, "dur": 61.443, + "args": { + "External id": 463409,"Record function id": 0, "Concrete Inputs": ["[4384]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267452835640.918, "dur": 10665.488, + "args": { + "External id": 463410,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], [], []], "Ev Idx": 16502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452835644.448, "dur": 10660.307, + "args": { + "External id": 463411,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], []], "Ev Idx": 16503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452835655.310, "dur": 11.700, + "args": { + "External id": 463412,"Record function id": 0, "Concrete Inputs": ["[4384]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452835672.265, "dur": 10623.247, + "args": { + "External id": 463413,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4384], [4384], []], "Ev Idx": 16505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267452847101.012, "dur": 52.051, + "args": { + "External id": 463414,"Record function id": 0, "Ev Idx": 16506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 2338707, "tid": 2338707, + "ts": 6267452847156.866, "dur": 303.334, + "args": { + "External id": 463415,"Record function id": 0, "Ev Idx": 16507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267452847226.588, "dur": 221.738, + "args": { + "External id": 463416,"Sequence number": 5036595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32000, 4096], [8, 4096]], "Ev Idx": 16508 + } + }, + { + "ph": "s", "id": 448, "pid": 2338707, "tid": 2338707, "ts": 6267452847226.588, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 2338707, "tid": 2338707, + "ts": 6267452847322.387, "dur": 71.720, + "args": { + "External id": 463417,"kernel_hash": "coj4cw2xx67czwyji56kkta4pwcipuxvxsnrrkf7vsjjhsb6mejv", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oj/coj4cw2xx67czwyji56kkta4pwcipuxvxsnrrkf7vsjjhsb6mejv.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096], [32000, 4096], [8, 4096, 4096], []], "Ev Idx": 16509 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267452847545.930, "dur": 101.473, + "args": { + "External id": 463418,"Record function id": 0, "Ev Idx": 16510 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 2338707, "tid": 2338707, + "ts": 6267452847661.630, "dur": 8121.951, + "args": { + "External id": 463419,"Record function id": 0, "Ev Idx": 16511 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338707, "tid": 2338707, + "ts": 6267452847670.808, "dur": 1063.228, + "args": { + "External id": 463420,"Record function id": 0, "Ev Idx": 16512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452847761.525, "dur": 14.339, + "args": { + "External id": 463421,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267452847793.224, "dur": 56.528, + "args": { + "External id": 463422,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452847803.516, "dur": 3.197, + "args": { + "External id": 463423,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452847810.482, "dur": 0.661, + "args": { + "External id": 463424,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452847813.070, "dur": 2.816, + "args": { + "External id": 463425,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452847817.112, "dur": 4.840, + "args": { + "External id": 463426,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452847823.667, "dur": 0.431, + "args": { + "External id": 463427,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452847828.596, "dur": 0.314, + "args": { + "External id": 463428,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452847830.166, "dur": 0.510, + "args": { + "External id": 463429,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452847832.128, "dur": 0.541, + "args": { + "External id": 463430,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452847836.866, "dur": 0.522, + "args": { + "External id": 463431,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452847865.032, "dur": 66.887, + "args": { + "External id": 463432,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267452847972.347, "dur": 210.561, + "args": { + "External id": 463433,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452847987.341, "dur": 7.290, + "args": { + "External id": 463434,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267452848000.875, "dur": 11.890, + "args": { + "External id": 463435,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267452848006.200, "dur": 6.079, + "args": { + "External id": 463436,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452848010.285, "dur": 0.527, + "args": { + "External id": 463437,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267452848020.380, "dur": 89.458, + "args": { + "External id": 463438,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452848022.796, "dur": 3.460, + "args": { + "External id": 463439,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452848028.790, "dur": 0.505, + "args": { + "External id": 463440,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452848033.292, "dur": 0.769, + "args": { + "External id": 463441,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452848035.646, "dur": 0.523, + "args": { + "External id": 463442,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452848038.034, "dur": 3.437, + "args": { + "External id": 463443,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452848042.921, "dur": 0.466, + "args": { + "External id": 463444,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452848045.102, "dur": 0.495, + "args": { + "External id": 463445,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452848050.502, "dur": 0.411, + "args": { + "External id": 463446,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452848052.345, "dur": 47.708, + "args": { + "External id": 463447,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452848126.470, "dur": 43.567, + "args": { + "External id": 463448,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267452848249.363, "dur": 367.537, + "args": { + "External id": 463449,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267452848289.500, "dur": 301.057, + "args": { + "External id": 463450,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16542, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267452848301.570, "dur": 282.266, + "args": { + "External id": 463451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267452848646.148, "dur": 3.864, + "args": { + "External id": 463452,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16544, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338707, "tid": 2338707, + "ts": 6267452848758.677, "dur": 6685.631, + "args": { + "External id": 463453,"Record function id": 0, "Ev Idx": 16545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452848874.821, "dur": 7.927, + "args": { + "External id": 463454,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452848886.787, "dur": 1.214, + "args": { + "External id": 463455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452848890.028, "dur": 1.059, + "args": { + "External id": 463456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452848893.021, "dur": 1.490, + "args": { + "External id": 463457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452848895.981, "dur": 1.247, + "args": { + "External id": 463458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452848901.581, "dur": 1.145, + "args": { + "External id": 463459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452848904.890, "dur": 1.502, + "args": { + "External id": 463460,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452848908.029, "dur": 7.336, + "args": { + "External id": 463461,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452848917.767, "dur": 1.045, + "args": { + "External id": 463462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452848923.639, "dur": 0.948, + "args": { + "External id": 463463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452848954.763, "dur": 6396.179, + "args": { + "External id": 463464,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452848973.476, "dur": 6359.031, + "args": { + "External id": 463465,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452849000.502, "dur": 17.980, + "args": { + "External id": 463466,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267452849022.942, "dur": 6236.947, + "args": { + "External id": 463467,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452849026.223, "dur": 6231.979, + "args": { + "External id": 463468,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452849032.952, "dur": 6.694, + "args": { + "External id": 463469,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452849041.782, "dur": 6207.825, + "args": { + "External id": 463470,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267452855697.920, "dur": 50.223, + "args": { + "External id": 463471,"Sequence number": 5036596, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16563 + } + }, + { + "ph": "s", "id": 447, "pid": 2338707, "tid": 2338707, "ts": 6267452855697.920, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267452855724.121, "dur": 17.037, + "args": { + "External id": 463472,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452855731.308, "dur": 9.405, + "args": { + "External id": 463473,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267452855841.156, "dur": 180.326, + "args": { + "External id": 463474,"Record function id": 0, "Ev Idx": 16566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267452856023.007, "dur": 1654.455, + "args": { + "External id": 463475,"Record function id": 0, "Ev Idx": 16567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267452856135.743, "dur": 1521.015, + "args": { + "External id": 463476,"Sequence number": 5036597, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16568 + } + }, + { + "ph": "s", "id": 446, "pid": 2338707, "tid": 2338707, "ts": 6267452856135.743, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267452856260.470, "dur": 73.687, + "args": { + "External id": 463477,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452856353.686, "dur": 136.704, + "args": { + "External id": 463478,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452856505.240, "dur": 49.809, + "args": { + "External id": 463479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452856567.080, "dur": 55.577, + "args": { + "External id": 463480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267452856669.422, "dur": 36.565, + "args": { + "External id": 463481,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267452856734.691, "dur": 26.999, + "args": { + "External id": 463482,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267452856790.910, "dur": 184.580, + "args": { + "External id": 463483,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267452856864.021, "dur": 21.097, + "args": { + "External id": 463484,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452856871.734, "dur": 12.332, + "args": { + "External id": 463485,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452856889.566, "dur": 5.225, + "args": { + "External id": 463486,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452856896.239, "dur": 1.657, + "args": { + "External id": 463487,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452856901.340, "dur": 4.893, + "args": { + "External id": 463488,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452856988.656, "dur": 113.973, + "args": { + "External id": 463489,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267452857164.297, "dur": 48.509, + "args": { + "External id": 463490,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452857227.945, "dur": 63.095, + "args": { + "External id": 463491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452857303.478, "dur": 43.273, + "args": { + "External id": 463492,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267452857382.246, "dur": 35.260, + "args": { + "External id": 463493,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452857428.558, "dur": 51.036, + "args": { + "External id": 463494,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267452857505.554, "dur": 25.515, + "args": { + "External id": 463495,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16587 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 2338707, "tid": 2338707, + "ts": 6267452857763.986, "dur": 106.203, + "args": { + "External id": 463496,"Record function id": 0, "Ev Idx": 16588 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267452857967.054, "dur": 57.338, + "args": { + "External id": 463497,"Record function id": 0, "Ev Idx": 16589 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 2338707, "tid": 2338707, + "ts": 6267452858035.287, "dur": 27836.262, + "args": { + "External id": 463498,"Record function id": 0, "Ev Idx": 16590 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338707, "tid": 2338707, + "ts": 6267452858045.055, "dur": 1289.885, + "args": { + "External id": 463499,"Record function id": 0, "Ev Idx": 16591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452858210.891, "dur": 14.511, + "args": { + "External id": 463500,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267452858243.028, "dur": 52.894, + "args": { + "External id": 463501,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858250.175, "dur": 5.271, + "args": { + "External id": 463502,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858257.902, "dur": 0.620, + "args": { + "External id": 463503,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858260.391, "dur": 0.607, + "args": { + "External id": 463504,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858265.395, "dur": 0.355, + "args": { + "External id": 463505,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858267.460, "dur": 0.602, + "args": { + "External id": 463506,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858269.453, "dur": 0.622, + "args": { + "External id": 463507,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858274.122, "dur": 3.880, + "args": { + "External id": 463508,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858279.754, "dur": 0.478, + "args": { + "External id": 463509,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858281.787, "dur": 2.982, + "args": { + "External id": 463510,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452858320.143, "dur": 78.033, + "args": { + "External id": 463511,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267452858441.333, "dur": 148.757, + "args": { + "External id": 463512,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452858455.116, "dur": 7.760, + "args": { + "External id": 463513,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267452858468.799, "dur": 12.931, + "args": { + "External id": 463514,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267452858474.263, "dur": 6.975, + "args": { + "External id": 463515,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858478.840, "dur": 0.699, + "args": { + "External id": 463516,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267452858489.810, "dur": 35.099, + "args": { + "External id": 463517,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858492.299, "dur": 0.541, + "args": { + "External id": 463518,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858494.419, "dur": 2.554, + "args": { + "External id": 463519,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858498.281, "dur": 0.433, + "args": { + "External id": 463520,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858500.255, "dur": 2.525, + "args": { + "External id": 463521,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858507.285, "dur": 0.380, + "args": { + "External id": 463522,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858509.262, "dur": 0.385, + "args": { + "External id": 463523,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858511.379, "dur": 0.537, + "args": { + "External id": 463524,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858515.615, "dur": 0.348, + "args": { + "External id": 463525,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452858517.336, "dur": 0.430, + "args": { + "External id": 463526,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452858540.504, "dur": 39.150, + "args": { + "External id": 463527,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267452858681.205, "dur": 519.652, + "args": { + "External id": 463528,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267452858721.477, "dur": 471.921, + "args": { + "External id": 463529,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16621, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267452858733.689, "dur": 449.796, + "args": { + "External id": 463530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267452859235.352, "dur": 3.512, + "args": { + "External id": 463531,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16623, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338707, "tid": 2338707, + "ts": 6267452859361.609, "dur": 26208.469, + "args": { + "External id": 463532,"Record function id": 0, "Ev Idx": 16624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452859483.918, "dur": 7.966, + "args": { + "External id": 463533,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452859495.951, "dur": 1.482, + "args": { + "External id": 463534,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452859499.531, "dur": 3.869, + "args": { + "External id": 463535,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452859505.516, "dur": 1.000, + "args": { + "External id": 463536,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452859507.951, "dur": 0.895, + "args": { + "External id": 463537,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452859513.581, "dur": 1.552, + "args": { + "External id": 463538,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452859516.705, "dur": 1.146, + "args": { + "External id": 463539,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452859520.009, "dur": 3.624, + "args": { + "External id": 463540,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452859525.230, "dur": 1.120, + "args": { + "External id": 463541,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452859531.135, "dur": 1.052, + "args": { + "External id": 463542,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452859554.211, "dur": 25922.498, + "args": { + "External id": 463543,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452859572.766, "dur": 25885.718, + "args": { + "External id": 463544,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452859619.230, "dur": 20.329, + "args": { + "External id": 463545,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267452859644.058, "dur": 25748.793, + "args": { + "External id": 463546,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452859647.302, "dur": 25744.031, + "args": { + "External id": 463547,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452859654.458, "dur": 7.011, + "args": { + "External id": 463548,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452859663.372, "dur": 25719.527, + "args": { + "External id": 463549,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267452885785.603, "dur": 52.221, + "args": { + "External id": 463550,"Sequence number": 5036598, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16642 + } + }, + { + "ph": "s", "id": 445, "pid": 2338707, "tid": 2338707, "ts": 6267452885785.603, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267452885809.367, "dur": 21.254, + "args": { + "External id": 463551,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452885820.974, "dur": 9.122, + "args": { + "External id": 463552,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267452885925.108, "dur": 110.617, + "args": { + "External id": 463553,"Record function id": 0, "Ev Idx": 16645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267452886037.687, "dur": 1504.513, + "args": { + "External id": 463554,"Record function id": 0, "Ev Idx": 16646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267452886153.797, "dur": 1370.624, + "args": { + "External id": 463555,"Sequence number": 5036599, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16647 + } + }, + { + "ph": "s", "id": 444, "pid": 2338707, "tid": 2338707, "ts": 6267452886153.797, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267452886254.510, "dur": 64.575, + "args": { + "External id": 463556,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452886337.362, "dur": 127.348, + "args": { + "External id": 463557,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452886484.898, "dur": 48.061, + "args": { + "External id": 463558,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452886541.440, "dur": 34.660, + "args": { + "External id": 463559,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267452886631.736, "dur": 35.854, + "args": { + "External id": 463560,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267452886694.210, "dur": 21.384, + "args": { + "External id": 463561,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267452886746.042, "dur": 167.922, + "args": { + "External id": 463562,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267452886811.296, "dur": 13.620, + "args": { + "External id": 463563,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452886817.826, "dur": 6.140, + "args": { + "External id": 463564,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452886827.784, "dur": 7.911, + "args": { + "External id": 463565,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452886837.697, "dur": 1.248, + "args": { + "External id": 463566,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452886844.877, "dur": 7.226, + "args": { + "External id": 463567,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452886927.009, "dur": 69.703, + "args": { + "External id": 463568,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267452887034.524, "dur": 96.238, + "args": { + "External id": 463569,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452887151.972, "dur": 64.795, + "args": { + "External id": 463570,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452887225.553, "dur": 42.199, + "args": { + "External id": 463571,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267452887299.296, "dur": 36.201, + "args": { + "External id": 463572,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452887342.689, "dur": 46.190, + "args": { + "External id": 463573,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267452887411.275, "dur": 25.224, + "args": { + "External id": 463574,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16666 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 2338707, "tid": 2338707, + "ts": 6267452887648.461, "dur": 96.667, + "args": { + "External id": 463575,"Record function id": 0, "Ev Idx": 16667 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267452887840.887, "dur": 54.435, + "args": { + "External id": 463576,"Record function id": 0, "Ev Idx": 16668 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 2338707, "tid": 2338707, + "ts": 6267452887905.859, "dur": 27729.045, + "args": { + "External id": 463577,"Record function id": 0, "Ev Idx": 16669 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338707, "tid": 2338707, + "ts": 6267452887914.801, "dur": 1279.662, + "args": { + "External id": 463578,"Record function id": 0, "Ev Idx": 16670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452888011.956, "dur": 12.974, + "args": { + "External id": 463579,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267452888040.914, "dur": 109.659, + "args": { + "External id": 463580,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888047.065, "dur": 2.739, + "args": { + "External id": 463581,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888104.416, "dur": 3.126, + "args": { + "External id": 463582,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888112.121, "dur": 0.537, + "args": { + "External id": 463583,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888114.883, "dur": 0.987, + "args": { + "External id": 463584,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888120.252, "dur": 1.003, + "args": { + "External id": 463585,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888123.457, "dur": 0.823, + "args": { + "External id": 463586,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888125.747, "dur": 7.088, + "args": { + "External id": 463587,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888135.114, "dur": 0.864, + "args": { + "External id": 463588,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888137.667, "dur": 0.934, + "args": { + "External id": 463589,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452888166.528, "dur": 72.479, + "args": { + "External id": 463590,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267452888284.234, "dur": 162.189, + "args": { + "External id": 463591,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452888299.336, "dur": 7.600, + "args": { + "External id": 463592,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267452888313.227, "dur": 12.783, + "args": { + "External id": 463593,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267452888318.744, "dur": 6.809, + "args": { + "External id": 463594,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888323.042, "dur": 0.881, + "args": { + "External id": 463595,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267452888335.056, "dur": 40.755, + "args": { + "External id": 463596,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888337.679, "dur": 3.648, + "args": { + "External id": 463597,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888342.807, "dur": 0.472, + "args": { + "External id": 463598,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888345.216, "dur": 0.520, + "args": { + "External id": 463599,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888350.083, "dur": 3.103, + "args": { + "External id": 463600,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888354.754, "dur": 0.567, + "args": { + "External id": 463601,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888357.067, "dur": 0.465, + "args": { + "External id": 463602,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888361.168, "dur": 0.734, + "args": { + "External id": 463603,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888364.001, "dur": 0.598, + "args": { + "External id": 463604,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452888366.227, "dur": 2.737, + "args": { + "External id": 463605,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452888395.103, "dur": 39.908, + "args": { + "External id": 463606,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267452888515.633, "dur": 490.557, + "args": { + "External id": 463607,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267452888555.202, "dur": 444.160, + "args": { + "External id": 463608,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16700, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267452888569.790, "dur": 421.704, + "args": { + "External id": 463609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267452889038.881, "dur": 2.952, + "args": { + "External id": 463610,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16702, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338707, "tid": 2338707, + "ts": 6267452889222.097, "dur": 26110.648, + "args": { + "External id": 463611,"Record function id": 0, "Ev Idx": 16703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452889345.115, "dur": 8.435, + "args": { + "External id": 463612,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452889357.878, "dur": 1.467, + "args": { + "External id": 463613,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452889361.573, "dur": 3.697, + "args": { + "External id": 463614,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452889367.608, "dur": 1.489, + "args": { + "External id": 463615,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452889370.882, "dur": 1.256, + "args": { + "External id": 463616,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452889373.576, "dur": 1.255, + "args": { + "External id": 463617,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452889378.859, "dur": 1.125, + "args": { + "External id": 463618,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452889381.582, "dur": 4.278, + "args": { + "External id": 463619,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452889388.021, "dur": 1.107, + "args": { + "External id": 463620,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452889390.694, "dur": 1.157, + "args": { + "External id": 463621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452889415.996, "dur": 25831.729, + "args": { + "External id": 463622,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452889435.311, "dur": 25796.732, + "args": { + "External id": 463623,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452889459.211, "dur": 19.268, + "args": { + "External id": 463624,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267452889482.418, "dur": 25686.855, + "args": { + "External id": 463625,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452889485.615, "dur": 25682.366, + "args": { + "External id": 463626,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452889493.374, "dur": 6.268, + "args": { + "External id": 463627,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452889501.853, "dur": 25659.029, + "args": { + "External id": 463628,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267452915528.077, "dur": 53.469, + "args": { + "External id": 463629,"Sequence number": 5036600, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16721 + } + }, + { + "ph": "s", "id": 443, "pid": 2338707, "tid": 2338707, "ts": 6267452915528.077, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267452915558.365, "dur": 16.485, + "args": { + "External id": 463630,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452915566.056, "dur": 8.507, + "args": { + "External id": 463631,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267452915689.123, "dur": 90.010, + "args": { + "External id": 463632,"Record function id": 0, "Ev Idx": 16724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267452915781.293, "dur": 1525.027, + "args": { + "External id": 463633,"Record function id": 0, "Ev Idx": 16725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267452915829.996, "dur": 1457.402, + "args": { + "External id": 463634,"Sequence number": 5036601, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16726 + } + }, + { + "ph": "s", "id": 442, "pid": 2338707, "tid": 2338707, "ts": 6267452915829.996, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267452915922.132, "dur": 71.888, + "args": { + "External id": 463635,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452916014.697, "dur": 201.091, + "args": { + "External id": 463636,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452916242.013, "dur": 53.828, + "args": { + "External id": 463637,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452916307.538, "dur": 34.790, + "args": { + "External id": 463638,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267452916381.307, "dur": 36.514, + "args": { + "External id": 463639,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267452916443.535, "dur": 22.774, + "args": { + "External id": 463640,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267452916496.303, "dur": 193.640, + "args": { + "External id": 463641,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267452916558.887, "dur": 15.040, + "args": { + "External id": 463642,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452916566.145, "dur": 6.770, + "args": { + "External id": 463643,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452916577.386, "dur": 5.403, + "args": { + "External id": 463644,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452916584.682, "dur": 1.173, + "args": { + "External id": 463645,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452916589.155, "dur": 29.634, + "args": { + "External id": 463646,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452916704.366, "dur": 69.445, + "args": { + "External id": 463647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267452916815.159, "dur": 34.765, + "args": { + "External id": 463648,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452916861.986, "dur": 52.779, + "args": { + "External id": 463649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452916925.340, "dur": 40.827, + "args": { + "External id": 463650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267452916993.803, "dur": 32.877, + "args": { + "External id": 463651,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452917036.145, "dur": 104.164, + "args": { + "External id": 463652,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267452917174.586, "dur": 26.871, + "args": { + "External id": 463653,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16745 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 2338707, "tid": 2338707, + "ts": 6267452917385.966, "dur": 91.721, + "args": { + "External id": 463654,"Record function id": 0, "Ev Idx": 16746 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267452917567.619, "dur": 80.483, + "args": { + "External id": 463655,"Record function id": 0, "Ev Idx": 16747 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 2338707, "tid": 2338707, + "ts": 6267452917661.418, "dur": 27724.496, + "args": { + "External id": 463656,"Record function id": 0, "Ev Idx": 16748 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338707, "tid": 2338707, + "ts": 6267452917671.984, "dur": 1284.836, + "args": { + "External id": 463657,"Record function id": 0, "Ev Idx": 16749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452917770.198, "dur": 13.554, + "args": { + "External id": 463658,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267452917799.552, "dur": 50.953, + "args": { + "External id": 463659,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452917805.996, "dur": 2.926, + "args": { + "External id": 463660,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452917814.567, "dur": 0.480, + "args": { + "External id": 463661,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452917816.916, "dur": 0.589, + "args": { + "External id": 463662,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452917819.386, "dur": 0.457, + "args": { + "External id": 463663,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452917823.220, "dur": 0.600, + "args": { + "External id": 463664,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452917825.284, "dur": 0.688, + "args": { + "External id": 463665,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452917827.841, "dur": 6.048, + "args": { + "External id": 463666,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452917835.678, "dur": 0.532, + "args": { + "External id": 463667,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452917838.184, "dur": 0.468, + "args": { + "External id": 463668,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452917864.606, "dur": 67.924, + "args": { + "External id": 463669,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267452917972.230, "dur": 250.668, + "args": { + "External id": 463670,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452917985.347, "dur": 4.874, + "args": { + "External id": 463671,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267452917996.271, "dur": 12.796, + "args": { + "External id": 463672,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267452918001.756, "dur": 6.846, + "args": { + "External id": 463673,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452918006.176, "dur": 0.713, + "args": { + "External id": 463674,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267452918016.306, "dur": 119.167, + "args": { + "External id": 463675,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452918018.640, "dur": 3.053, + "args": { + "External id": 463676,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452918024.173, "dur": 0.589, + "args": { + "External id": 463677,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452918026.373, "dur": 0.475, + "args": { + "External id": 463678,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452918030.999, "dur": 3.141, + "args": { + "External id": 463679,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452918035.729, "dur": 0.408, + "args": { + "External id": 463680,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452918037.784, "dur": 0.362, + "args": { + "External id": 463681,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452918042.226, "dur": 0.354, + "args": { + "External id": 463682,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452918044.293, "dur": 0.426, + "args": { + "External id": 463683,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452918046.403, "dur": 3.046, + "args": { + "External id": 463684,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452918165.792, "dur": 45.196, + "args": { + "External id": 463685,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267452918295.098, "dur": 531.249, + "args": { + "External id": 463686,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267452918333.907, "dur": 485.515, + "args": { + "External id": 463687,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16779, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267452918346.542, "dur": 464.753, + "args": { + "External id": 463688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267452918859.043, "dur": 2.939, + "args": { + "External id": 463689,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16781, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338707, "tid": 2338707, + "ts": 6267452918983.941, "dur": 26129.218, + "args": { + "External id": 463690,"Record function id": 0, "Ev Idx": 16782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452919173.536, "dur": 8.724, + "args": { + "External id": 463691,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452919187.574, "dur": 1.185, + "args": { + "External id": 463692,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452919190.938, "dur": 3.677, + "args": { + "External id": 463693,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452919197.090, "dur": 1.144, + "args": { + "External id": 463694,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452919199.729, "dur": 1.047, + "args": { + "External id": 463695,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452919202.542, "dur": 1.181, + "args": { + "External id": 463696,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452919208.299, "dur": 1.458, + "args": { + "External id": 463697,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452919212.147, "dur": 3.964, + "args": { + "External id": 463698,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452919217.362, "dur": 0.873, + "args": { + "External id": 463699,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452919220.105, "dur": 1.015, + "args": { + "External id": 463700,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452919247.626, "dur": 25770.323, + "args": { + "External id": 463701,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452919267.985, "dur": 25738.385, + "args": { + "External id": 463702,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452919294.278, "dur": 20.035, + "args": { + "External id": 463703,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267452919318.574, "dur": 25639.446, + "args": { + "External id": 463704,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452919321.806, "dur": 25634.348, + "args": { + "External id": 463705,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452919329.472, "dur": 9.181, + "args": { + "External id": 463706,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452919340.656, "dur": 25611.801, + "args": { + "External id": 463707,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267452945304.792, "dur": 46.482, + "args": { + "External id": 463708,"Sequence number": 5036602, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16800 + } + }, + { + "ph": "s", "id": 441, "pid": 2338707, "tid": 2338707, "ts": 6267452945304.792, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267452945331.069, "dur": 13.225, + "args": { + "External id": 463709,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452945336.599, "dur": 7.390, + "args": { + "External id": 463710,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267452945446.274, "dur": 90.983, + "args": { + "External id": 463711,"Record function id": 0, "Ev Idx": 16803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267452945540.292, "dur": 1408.535, + "args": { + "External id": 463712,"Record function id": 0, "Ev Idx": 16804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267452945589.729, "dur": 1342.323, + "args": { + "External id": 463713,"Sequence number": 5036603, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16805 + } + }, + { + "ph": "s", "id": 440, "pid": 2338707, "tid": 2338707, "ts": 6267452945589.729, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267452945700.464, "dur": 67.288, + "args": { + "External id": 463714,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452945788.672, "dur": 123.260, + "args": { + "External id": 463715,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452945928.149, "dur": 46.244, + "args": { + "External id": 463716,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452945985.338, "dur": 35.316, + "args": { + "External id": 463717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267452946094.375, "dur": 39.593, + "args": { + "External id": 463718,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267452946164.956, "dur": 19.486, + "args": { + "External id": 463719,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267452946213.651, "dur": 161.752, + "args": { + "External id": 463720,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267452946274.828, "dur": 15.638, + "args": { + "External id": 463721,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452946281.950, "dur": 7.430, + "args": { + "External id": 463722,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452946295.051, "dur": 5.319, + "args": { + "External id": 463723,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452946302.054, "dur": 1.113, + "args": { + "External id": 463724,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452946305.885, "dur": 6.401, + "args": { + "External id": 463725,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452946389.243, "dur": 69.791, + "args": { + "External id": 463726,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267452946502.599, "dur": 32.692, + "args": { + "External id": 463727,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452946547.390, "dur": 73.643, + "args": { + "External id": 463728,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452946640.334, "dur": 48.018, + "args": { + "External id": 463729,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267452946717.608, "dur": 31.948, + "args": { + "External id": 463730,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452946758.884, "dur": 46.023, + "args": { + "External id": 463731,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267452946828.835, "dur": 24.358, + "args": { + "External id": 463732,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16824 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 2338707, "tid": 2338707, + "ts": 6267452947025.686, "dur": 137.872, + "args": { + "External id": 463733,"Record function id": 0, "Ev Idx": 16825 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267452947259.376, "dur": 58.323, + "args": { + "External id": 463734,"Record function id": 0, "Ev Idx": 16826 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 2338707, "tid": 2338707, + "ts": 6267452947328.927, "dur": 29905.043, + "args": { + "External id": 463735,"Record function id": 0, "Ev Idx": 16827 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338707, "tid": 2338707, + "ts": 6267452947338.963, "dur": 1165.315, + "args": { + "External id": 463736,"Record function id": 0, "Ev Idx": 16828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452947434.857, "dur": 12.649, + "args": { + "External id": 463737,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267452947463.873, "dur": 50.119, + "args": { + "External id": 463738,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947471.024, "dur": 2.707, + "args": { + "External id": 463739,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947479.133, "dur": 0.596, + "args": { + "External id": 463740,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947481.173, "dur": 0.518, + "args": { + "External id": 463741,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947483.369, "dur": 0.565, + "args": { + "External id": 463742,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947488.370, "dur": 0.508, + "args": { + "External id": 463743,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947490.393, "dur": 0.564, + "args": { + "External id": 463744,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947492.917, "dur": 5.903, + "args": { + "External id": 463745,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947500.810, "dur": 0.559, + "args": { + "External id": 463746,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947502.960, "dur": 0.560, + "args": { + "External id": 463747,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452947528.586, "dur": 87.747, + "args": { + "External id": 463748,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267452947666.230, "dur": 161.690, + "args": { + "External id": 463749,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452947681.866, "dur": 8.276, + "args": { + "External id": 463750,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267452947696.757, "dur": 13.418, + "args": { + "External id": 463751,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267452947702.264, "dur": 7.421, + "args": { + "External id": 463752,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947706.871, "dur": 0.864, + "args": { + "External id": 463753,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267452947718.972, "dur": 38.453, + "args": { + "External id": 463754,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947721.297, "dur": 0.771, + "args": { + "External id": 463755,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947724.449, "dur": 3.052, + "args": { + "External id": 463756,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947729.057, "dur": 0.428, + "args": { + "External id": 463757,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947731.241, "dur": 3.033, + "args": { + "External id": 463758,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947738.104, "dur": 0.479, + "args": { + "External id": 463759,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947740.339, "dur": 0.323, + "args": { + "External id": 463760,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947742.258, "dur": 0.496, + "args": { + "External id": 463761,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947746.784, "dur": 0.665, + "args": { + "External id": 463762,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452947749.004, "dur": 0.528, + "args": { + "External id": 463763,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452947774.511, "dur": 43.660, + "args": { + "External id": 463764,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267452947890.165, "dur": 491.818, + "args": { + "External id": 463765,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267452947926.960, "dur": 448.211, + "args": { + "External id": 463766,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16858, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267452947938.856, "dur": 428.356, + "args": { + "External id": 463767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267452948413.799, "dur": 2.791, + "args": { + "External id": 463768,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16860, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338707, "tid": 2338707, + "ts": 6267452948529.520, "dur": 28443.976, + "args": { + "External id": 463769,"Record function id": 0, "Ev Idx": 16861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452948665.493, "dur": 8.035, + "args": { + "External id": 463770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452948678.196, "dur": 1.219, + "args": { + "External id": 463771,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452948681.710, "dur": 4.612, + "args": { + "External id": 463772,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452948688.259, "dur": 1.231, + "args": { + "External id": 463773,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452948691.107, "dur": 1.012, + "args": { + "External id": 463774,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452948693.526, "dur": 1.203, + "args": { + "External id": 463775,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452948699.134, "dur": 1.164, + "args": { + "External id": 463776,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452948702.210, "dur": 4.079, + "args": { + "External id": 463777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452948708.044, "dur": 1.255, + "args": { + "External id": 463778,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452948711.548, "dur": 0.972, + "args": { + "External id": 463779,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452948737.369, "dur": 28181.217, + "args": { + "External id": 463780,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452948755.689, "dur": 28152.287, + "args": { + "External id": 463781,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452948778.450, "dur": 19.394, + "args": { + "External id": 463782,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267452948801.886, "dur": 28063.014, + "args": { + "External id": 463783,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452948804.956, "dur": 28058.178, + "args": { + "External id": 463784,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452948811.970, "dur": 9.030, + "args": { + "External id": 463785,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452948823.093, "dur": 28036.241, + "args": { + "External id": 463786,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267452977164.844, "dur": 38.668, + "args": { + "External id": 463787,"Sequence number": 5036604, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16879 + } + }, + { + "ph": "s", "id": 439, "pid": 2338707, "tid": 2338707, "ts": 6267452977164.844, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267452977186.534, "dur": 11.231, + "args": { + "External id": 463788,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452977191.575, "dur": 5.698, + "args": { + "External id": 463789,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267452977280.798, "dur": 88.813, + "args": { + "External id": 463790,"Record function id": 0, "Ev Idx": 16882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267452977371.432, "dur": 1374.005, + "args": { + "External id": 463791,"Record function id": 0, "Ev Idx": 16883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267452977416.978, "dur": 1312.822, + "args": { + "External id": 463792,"Sequence number": 5036605, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16884 + } + }, + { + "ph": "s", "id": 438, "pid": 2338707, "tid": 2338707, "ts": 6267452977416.978, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267452977505.622, "dur": 61.334, + "args": { + "External id": 463793,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452977583.735, "dur": 137.101, + "args": { + "External id": 463794,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452977739.780, "dur": 48.331, + "args": { + "External id": 463795,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452977799.094, "dur": 35.622, + "args": { + "External id": 463796,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267452977869.056, "dur": 34.034, + "args": { + "External id": 463797,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267452977926.781, "dur": 21.109, + "args": { + "External id": 463798,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267452977975.031, "dur": 210.693, + "args": { + "External id": 463799,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267452978035.129, "dur": 14.215, + "args": { + "External id": 463800,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452978042.244, "dur": 6.300, + "args": { + "External id": 463801,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452978093.089, "dur": 8.026, + "args": { + "External id": 463802,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452978103.192, "dur": 1.240, + "args": { + "External id": 463803,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452978107.982, "dur": 9.649, + "args": { + "External id": 463804,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452978199.720, "dur": 71.077, + "args": { + "External id": 463805,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267452978310.061, "dur": 39.094, + "args": { + "External id": 463806,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452978361.493, "dur": 53.058, + "args": { + "External id": 463807,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452978424.734, "dur": 42.171, + "args": { + "External id": 463808,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267452978496.392, "dur": 32.747, + "args": { + "External id": 463809,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267452978538.705, "dur": 43.260, + "args": { + "External id": 463810,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267452978622.558, "dur": 25.645, + "args": { + "External id": 463811,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16903 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 2338707, "tid": 2338707, + "ts": 6267452978821.987, "dur": 92.925, + "args": { + "External id": 463812,"Record function id": 0, "Ev Idx": 16904 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267452979004.600, "dur": 102.360, + "args": { + "External id": 463813,"Record function id": 0, "Ev Idx": 16905 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 2338707, "tid": 2338707, + "ts": 6267452979120.473, "dur": 33006.074, + "args": { + "External id": 463814,"Record function id": 0, "Ev Idx": 16906 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338707, "tid": 2338707, + "ts": 6267452979130.232, "dur": 1118.630, + "args": { + "External id": 463815,"Record function id": 0, "Ev Idx": 16907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452979225.271, "dur": 11.690, + "args": { + "External id": 463816,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267452979252.734, "dur": 56.151, + "args": { + "External id": 463817,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979259.950, "dur": 2.857, + "args": { + "External id": 463818,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979267.955, "dur": 0.690, + "args": { + "External id": 463819,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979270.298, "dur": 0.530, + "args": { + "External id": 463820,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979272.512, "dur": 0.542, + "args": { + "External id": 463821,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979278.189, "dur": 0.521, + "args": { + "External id": 463822,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979280.052, "dur": 0.620, + "args": { + "External id": 463823,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979282.845, "dur": 5.427, + "args": { + "External id": 463824,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979289.889, "dur": 0.550, + "args": { + "External id": 463825,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979297.829, "dur": 0.412, + "args": { + "External id": 463826,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452979320.961, "dur": 66.173, + "args": { + "External id": 463827,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267452979427.444, "dur": 147.589, + "args": { + "External id": 463828,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452979440.841, "dur": 4.784, + "args": { + "External id": 463829,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267452979451.546, "dur": 17.286, + "args": { + "External id": 463830,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267452979459.657, "dur": 8.699, + "args": { + "External id": 463831,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979463.805, "dur": 2.885, + "args": { + "External id": 463832,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267452979476.905, "dur": 36.381, + "args": { + "External id": 463833,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979479.091, "dur": 0.644, + "args": { + "External id": 463834,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979481.068, "dur": 0.737, + "args": { + "External id": 463835,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979483.528, "dur": 0.727, + "args": { + "External id": 463836,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979487.715, "dur": 3.307, + "args": { + "External id": 463837,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979492.808, "dur": 0.689, + "args": { + "External id": 463838,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979494.476, "dur": 2.665, + "args": { + "External id": 463839,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979498.684, "dur": 0.424, + "args": { + "External id": 463840,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979500.450, "dur": 0.340, + "args": { + "External id": 463841,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452979505.550, "dur": 0.410, + "args": { + "External id": 463842,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452979527.851, "dur": 36.944, + "args": { + "External id": 463843,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267452979660.060, "dur": 466.806, + "args": { + "External id": 463844,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267452979697.513, "dur": 422.565, + "args": { + "External id": 463845,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16937, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267452979710.515, "dur": 402.079, + "args": { + "External id": 463846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267452980157.789, "dur": 3.190, + "args": { + "External id": 463847,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16939, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338707, "tid": 2338707, + "ts": 6267452980275.357, "dur": 31565.625, + "args": { + "External id": 463848,"Record function id": 0, "Ev Idx": 16940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452980391.576, "dur": 7.936, + "args": { + "External id": 463849,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452980403.524, "dur": 1.483, + "args": { + "External id": 463850,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452980406.974, "dur": 3.910, + "args": { + "External id": 463851,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452980412.961, "dur": 1.100, + "args": { + "External id": 463852,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452980415.618, "dur": 1.339, + "args": { + "External id": 463853,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452980421.242, "dur": 1.058, + "args": { + "External id": 463854,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452980423.997, "dur": 1.053, + "args": { + "External id": 463855,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452980427.431, "dur": 3.900, + "args": { + "External id": 463856,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452980433.038, "dur": 1.143, + "args": { + "External id": 463857,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267452980439.218, "dur": 1.315, + "args": { + "External id": 463858,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452980461.178, "dur": 31325.042, + "args": { + "External id": 463859,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452980478.958, "dur": 31296.755, + "args": { + "External id": 463860,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267452980496.846, "dur": 19.924, + "args": { + "External id": 463861,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267452980521.173, "dur": 31205.410, + "args": { + "External id": 463862,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267452980524.067, "dur": 31200.757, + "args": { + "External id": 463863,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267452980534.213, "dur": 8.756, + "args": { + "External id": 463864,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267452980544.778, "dur": 31176.065, + "args": { + "External id": 463865,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453012011.493, "dur": 77.395, + "args": { + "External id": 463866,"Sequence number": 5036606, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16958 + } + }, + { + "ph": "s", "id": 437, "pid": 2338707, "tid": 2338707, "ts": 6267453012011.493, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453012037.589, "dur": 12.118, + "args": { + "External id": 463867,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453012043.034, "dur": 6.419, + "args": { + "External id": 463868,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453012179.913, "dur": 89.229, + "args": { + "External id": 463869,"Record function id": 0, "Ev Idx": 16961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453012271.162, "dur": 1420.270, + "args": { + "External id": 463870,"Record function id": 0, "Ev Idx": 16962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453012317.884, "dur": 1356.285, + "args": { + "External id": 463871,"Sequence number": 5036607, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16963 + } + }, + { + "ph": "s", "id": 436, "pid": 2338707, "tid": 2338707, "ts": 6267453012317.884, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453012402.653, "dur": 66.316, + "args": { + "External id": 463872,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453012525.514, "dur": 142.933, + "args": { + "External id": 463873,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453012689.748, "dur": 48.718, + "args": { + "External id": 463874,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453012749.650, "dur": 34.266, + "args": { + "External id": 463875,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453012819.746, "dur": 33.183, + "args": { + "External id": 463876,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453012879.535, "dur": 22.794, + "args": { + "External id": 463877,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453012930.564, "dur": 210.681, + "args": { + "External id": 463878,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453012993.307, "dur": 15.787, + "args": { + "External id": 463879,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453013001.076, "dur": 7.124, + "args": { + "External id": 463880,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453013013.211, "dur": 4.953, + "args": { + "External id": 463881,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453013019.670, "dur": 1.182, + "args": { + "External id": 463882,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453013023.528, "dur": 9.272, + "args": { + "External id": 463883,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453013155.801, "dur": 74.176, + "args": { + "External id": 463884,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453013270.278, "dur": 36.905, + "args": { + "External id": 463885,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453013319.214, "dur": 51.775, + "args": { + "External id": 463886,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453013379.599, "dur": 41.106, + "args": { + "External id": 463887,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453013450.587, "dur": 29.828, + "args": { + "External id": 463888,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453013489.153, "dur": 42.040, + "args": { + "External id": 463889,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453013552.375, "dur": 19.803, + "args": { + "External id": 463890,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16982 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 2338707, "tid": 2338707, + "ts": 6267453013771.729, "dur": 90.885, + "args": { + "External id": 463891,"Record function id": 0, "Ev Idx": 16983 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453013950.380, "dur": 55.747, + "args": { + "External id": 463892,"Record function id": 0, "Ev Idx": 16984 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 2338707, "tid": 2338707, + "ts": 6267453014016.501, "dur": 30330.805, + "args": { + "External id": 463893,"Record function id": 0, "Ev Idx": 16985 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338707, "tid": 2338707, + "ts": 6267453014025.721, "dur": 1150.798, + "args": { + "External id": 463894,"Record function id": 0, "Ev Idx": 16986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453014166.547, "dur": 12.231, + "args": { + "External id": 463895,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453014196.534, "dur": 48.280, + "args": { + "External id": 463896,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014203.661, "dur": 2.750, + "args": { + "External id": 463897,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014211.288, "dur": 0.466, + "args": { + "External id": 463898,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014213.421, "dur": 0.526, + "args": { + "External id": 463899,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014215.230, "dur": 0.664, + "args": { + "External id": 463900,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014219.791, "dur": 0.607, + "args": { + "External id": 463901,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014224.981, "dur": 0.631, + "args": { + "External id": 463902,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014227.029, "dur": 5.099, + "args": { + "External id": 463903,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014233.323, "dur": 0.349, + "args": { + "External id": 463904,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014235.323, "dur": 0.373, + "args": { + "External id": 463905,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453014258.261, "dur": 68.228, + "args": { + "External id": 463906,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453014365.449, "dur": 146.139, + "args": { + "External id": 463907,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453014381.100, "dur": 4.667, + "args": { + "External id": 463908,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453014392.148, "dur": 14.270, + "args": { + "External id": 463909,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453014399.604, "dur": 6.354, + "args": { + "External id": 463910,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014404.047, "dur": 0.461, + "args": { + "External id": 463911,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453014414.074, "dur": 38.679, + "args": { + "External id": 463912,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014416.685, "dur": 2.409, + "args": { + "External id": 463913,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014421.074, "dur": 0.389, + "args": { + "External id": 463914,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014422.662, "dur": 0.272, + "args": { + "External id": 463915,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014427.389, "dur": 3.679, + "args": { + "External id": 463916,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014432.369, "dur": 0.295, + "args": { + "External id": 463917,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014434.278, "dur": 0.245, + "args": { + "External id": 463918,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014438.481, "dur": 0.351, + "args": { + "External id": 463919,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014440.221, "dur": 0.349, + "args": { + "External id": 463920,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453014441.953, "dur": 2.697, + "args": { + "External id": 463921,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453014464.915, "dur": 36.541, + "args": { + "External id": 463922,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453014573.995, "dur": 441.995, + "args": { + "External id": 463923,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453014626.890, "dur": 382.361, + "args": { + "External id": 463924,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17016, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453014640.514, "dur": 362.455, + "args": { + "External id": 463925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453015043.078, "dur": 2.878, + "args": { + "External id": 463926,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17018, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338707, "tid": 2338707, + "ts": 6267453015204.115, "dur": 28892.170, + "args": { + "External id": 463927,"Record function id": 0, "Ev Idx": 17019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453015319.349, "dur": 7.835, + "args": { + "External id": 463928,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453015331.170, "dur": 1.268, + "args": { + "External id": 463929,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453015334.454, "dur": 4.783, + "args": { + "External id": 463930,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453015341.297, "dur": 1.062, + "args": { + "External id": 463931,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453015343.977, "dur": 1.530, + "args": { + "External id": 463932,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453015346.924, "dur": 1.761, + "args": { + "External id": 463933,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453015353.450, "dur": 1.314, + "args": { + "External id": 463934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453015356.649, "dur": 3.747, + "args": { + "External id": 463935,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453015362.046, "dur": 1.118, + "args": { + "External id": 463936,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453015364.729, "dur": 0.992, + "args": { + "External id": 463937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453015388.568, "dur": 28614.017, + "args": { + "External id": 463938,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453015406.573, "dur": 28584.815, + "args": { + "External id": 463939,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453015427.696, "dur": 19.956, + "args": { + "External id": 463940,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453015451.758, "dur": 28493.746, + "args": { + "External id": 463941,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453015454.891, "dur": 28489.771, + "args": { + "External id": 463942,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453015461.574, "dur": 7.034, + "args": { + "External id": 463943,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453015470.965, "dur": 28468.715, + "args": { + "External id": 463944,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453044273.837, "dur": 43.517, + "args": { + "External id": 463945,"Sequence number": 5036608, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17037 + } + }, + { + "ph": "s", "id": 435, "pid": 2338707, "tid": 2338707, "ts": 6267453044273.837, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453044299.009, "dur": 13.017, + "args": { + "External id": 463946,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453044304.706, "dur": 6.990, + "args": { + "External id": 463947,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453044399.565, "dur": 85.548, + "args": { + "External id": 463948,"Record function id": 0, "Ev Idx": 17040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453044487.653, "dur": 1371.151, + "args": { + "External id": 463949,"Record function id": 0, "Ev Idx": 17041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453044532.536, "dur": 1308.245, + "args": { + "External id": 463950,"Sequence number": 5036609, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17042 + } + }, + { + "ph": "s", "id": 434, "pid": 2338707, "tid": 2338707, "ts": 6267453044532.536, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453044631.841, "dur": 63.947, + "args": { + "External id": 463951,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453044715.712, "dur": 120.164, + "args": { + "External id": 463952,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453044855.495, "dur": 45.484, + "args": { + "External id": 463953,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453044912.089, "dur": 35.057, + "args": { + "External id": 463954,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453044978.101, "dur": 31.265, + "args": { + "External id": 463955,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453045034.782, "dur": 64.586, + "args": { + "External id": 463956,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453045134.042, "dur": 168.533, + "args": { + "External id": 463957,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453045199.393, "dur": 16.116, + "args": { + "External id": 463958,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453045206.771, "dur": 7.789, + "args": { + "External id": 463959,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453045219.762, "dur": 4.332, + "args": { + "External id": 463960,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453045225.556, "dur": 1.293, + "args": { + "External id": 463961,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453045230.157, "dur": 6.885, + "args": { + "External id": 463962,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453045315.453, "dur": 69.114, + "args": { + "External id": 463963,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453045421.754, "dur": 34.962, + "args": { + "External id": 463964,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453045470.601, "dur": 50.787, + "args": { + "External id": 463965,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453045531.832, "dur": 42.425, + "args": { + "External id": 463966,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453045618.458, "dur": 36.708, + "args": { + "External id": 463967,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453045665.569, "dur": 47.372, + "args": { + "External id": 463968,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453045736.751, "dur": 22.796, + "args": { + "External id": 463969,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17061 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 2338707, "tid": 2338707, + "ts": 6267453045934.464, "dur": 97.277, + "args": { + "External id": 463970,"Record function id": 0, "Ev Idx": 17062 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453046174.388, "dur": 60.124, + "args": { + "External id": 463971,"Record function id": 0, "Ev Idx": 17063 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 2338707, "tid": 2338707, + "ts": 6267453046245.326, "dur": 31653.665, + "args": { + "External id": 463972,"Record function id": 0, "Ev Idx": 17064 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338707, "tid": 2338707, + "ts": 6267453046256.501, "dur": 1126.171, + "args": { + "External id": 463973,"Record function id": 0, "Ev Idx": 17065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453046354.350, "dur": 12.664, + "args": { + "External id": 463974,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453046382.261, "dur": 47.980, + "args": { + "External id": 463975,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046389.621, "dur": 2.978, + "args": { + "External id": 463976,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046397.186, "dur": 0.583, + "args": { + "External id": 463977,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046399.646, "dur": 0.385, + "args": { + "External id": 463978,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046401.519, "dur": 0.661, + "args": { + "External id": 463979,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046406.537, "dur": 0.564, + "args": { + "External id": 463980,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046408.612, "dur": 0.488, + "args": { + "External id": 463981,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046410.897, "dur": 5.029, + "args": { + "External id": 463982,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046417.432, "dur": 0.294, + "args": { + "External id": 463983,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046419.569, "dur": 0.290, + "args": { + "External id": 463984,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453046443.139, "dur": 73.087, + "args": { + "External id": 463985,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453046563.190, "dur": 176.457, + "args": { + "External id": 463986,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453046578.730, "dur": 7.033, + "args": { + "External id": 463987,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453046608.558, "dur": 18.185, + "args": { + "External id": 463988,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453046618.111, "dur": 8.166, + "args": { + "External id": 463989,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046623.093, "dur": 0.975, + "args": { + "External id": 463990,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453046637.150, "dur": 36.539, + "args": { + "External id": 463991,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046639.501, "dur": 0.667, + "args": { + "External id": 463992,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046642.006, "dur": 3.802, + "args": { + "External id": 463993,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046647.241, "dur": 0.376, + "args": { + "External id": 463994,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046649.380, "dur": 3.057, + "args": { + "External id": 463995,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046656.620, "dur": 0.345, + "args": { + "External id": 463996,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046658.506, "dur": 0.488, + "args": { + "External id": 463997,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046660.440, "dur": 0.334, + "args": { + "External id": 463998,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046664.919, "dur": 0.307, + "args": { + "External id": 463999,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453046666.140, "dur": 0.485, + "args": { + "External id": 464000,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453046688.984, "dur": 39.766, + "args": { + "External id": 464001,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453046804.128, "dur": 459.342, + "args": { + "External id": 464002,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453046841.383, "dur": 415.045, + "args": { + "External id": 464003,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17095, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453046853.451, "dur": 395.515, + "args": { + "External id": 464004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453047294.202, "dur": 2.849, + "args": { + "External id": 464005,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17097, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338707, "tid": 2338707, + "ts": 6267453047407.437, "dur": 30239.205, + "args": { + "External id": 464006,"Record function id": 0, "Ev Idx": 17098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453047519.449, "dur": 7.960, + "args": { + "External id": 464007,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453047530.935, "dur": 1.440, + "args": { + "External id": 464008,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453047534.336, "dur": 4.000, + "args": { + "External id": 464009,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453047540.636, "dur": 1.054, + "args": { + "External id": 464010,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453047543.576, "dur": 1.254, + "args": { + "External id": 464011,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453047546.229, "dur": 1.473, + "args": { + "External id": 464012,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453047551.589, "dur": 1.459, + "args": { + "External id": 464013,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453047554.477, "dur": 2.665, + "args": { + "External id": 464014,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453047558.973, "dur": 1.154, + "args": { + "External id": 464015,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453047561.516, "dur": 0.845, + "args": { + "External id": 464016,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453047585.075, "dur": 29990.993, + "args": { + "External id": 464017,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453047621.589, "dur": 29944.046, + "args": { + "External id": 464018,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453047640.549, "dur": 19.926, + "args": { + "External id": 464019,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453047664.663, "dur": 29855.904, + "args": { + "External id": 464020,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453047668.149, "dur": 29850.698, + "args": { + "External id": 464021,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453047677.603, "dur": 6.866, + "args": { + "External id": 464022,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453047686.467, "dur": 29828.896, + "args": { + "External id": 464023,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453077825.285, "dur": 42.479, + "args": { + "External id": 464024,"Sequence number": 5036610, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17116 + } + }, + { + "ph": "s", "id": 433, "pid": 2338707, "tid": 2338707, "ts": 6267453077825.285, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453077848.965, "dur": 12.231, + "args": { + "External id": 464025,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453077854.296, "dur": 6.620, + "args": { + "External id": 464026,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453077953.578, "dur": 88.293, + "args": { + "External id": 464027,"Record function id": 0, "Ev Idx": 17119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453078044.509, "dur": 1459.632, + "args": { + "External id": 464028,"Record function id": 0, "Ev Idx": 17120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453078133.598, "dur": 1353.522, + "args": { + "External id": 464029,"Sequence number": 5036611, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17121 + } + }, + { + "ph": "s", "id": 432, "pid": 2338707, "tid": 2338707, "ts": 6267453078133.598, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453078228.061, "dur": 66.557, + "args": { + "External id": 464030,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453078313.654, "dur": 122.259, + "args": { + "External id": 464031,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453078454.837, "dur": 46.565, + "args": { + "External id": 464032,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453078512.909, "dur": 35.697, + "args": { + "External id": 464033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453078580.784, "dur": 52.757, + "args": { + "External id": 464034,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453078665.321, "dur": 21.872, + "args": { + "External id": 464035,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453078718.225, "dur": 182.094, + "args": { + "External id": 464036,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453078791.408, "dur": 18.037, + "args": { + "External id": 464037,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453078801.680, "dur": 6.923, + "args": { + "External id": 464038,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453078813.703, "dur": 5.020, + "args": { + "External id": 464039,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453078819.974, "dur": 1.508, + "args": { + "External id": 464040,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453078826.986, "dur": 6.769, + "args": { + "External id": 464041,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453078915.139, "dur": 71.209, + "args": { + "External id": 464042,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453079028.932, "dur": 74.308, + "args": { + "External id": 464043,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453079122.577, "dur": 58.862, + "args": { + "External id": 464044,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453079193.873, "dur": 43.893, + "args": { + "External id": 464045,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453079273.617, "dur": 35.226, + "args": { + "External id": 464046,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453079315.989, "dur": 44.991, + "args": { + "External id": 464047,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453079382.549, "dur": 26.774, + "args": { + "External id": 464048,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17140 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 2338707, "tid": 2338707, + "ts": 6267453079581.597, "dur": 111.078, + "args": { + "External id": 464049,"Record function id": 0, "Ev Idx": 17141 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453079785.681, "dur": 57.997, + "args": { + "External id": 464050,"Record function id": 0, "Ev Idx": 17142 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 2338707, "tid": 2338707, + "ts": 6267453079854.017, "dur": 33503.196, + "args": { + "External id": 464051,"Record function id": 0, "Ev Idx": 17143 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338707, "tid": 2338707, + "ts": 6267453079863.851, "dur": 1238.943, + "args": { + "External id": 464052,"Record function id": 0, "Ev Idx": 17144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453079957.899, "dur": 11.308, + "args": { + "External id": 464053,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453079986.754, "dur": 47.501, + "args": { + "External id": 464054,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453079993.788, "dur": 2.614, + "args": { + "External id": 464055,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080000.611, "dur": 0.708, + "args": { + "External id": 464056,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080002.908, "dur": 0.494, + "args": { + "External id": 464057,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080004.681, "dur": 0.574, + "args": { + "External id": 464058,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080009.602, "dur": 0.635, + "args": { + "External id": 464059,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080011.810, "dur": 0.648, + "args": { + "External id": 464060,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080014.513, "dur": 5.001, + "args": { + "External id": 464061,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080021.291, "dur": 0.663, + "args": { + "External id": 464062,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080024.019, "dur": 0.404, + "args": { + "External id": 464063,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453080048.394, "dur": 104.355, + "args": { + "External id": 464064,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453080198.136, "dur": 156.020, + "args": { + "External id": 464065,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453080213.109, "dur": 5.707, + "args": { + "External id": 464066,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453080228.488, "dur": 12.840, + "args": { + "External id": 464067,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453080233.889, "dur": 6.981, + "args": { + "External id": 464068,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080238.296, "dur": 0.744, + "args": { + "External id": 464069,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453080249.946, "dur": 40.931, + "args": { + "External id": 464070,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080252.322, "dur": 3.493, + "args": { + "External id": 464071,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080258.074, "dur": 0.828, + "args": { + "External id": 464072,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080260.316, "dur": 0.611, + "args": { + "External id": 464073,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080265.534, "dur": 3.190, + "args": { + "External id": 464074,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080269.819, "dur": 0.381, + "args": { + "External id": 464075,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080271.882, "dur": 0.562, + "args": { + "External id": 464076,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080275.962, "dur": 0.460, + "args": { + "External id": 464077,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080278.376, "dur": 0.315, + "args": { + "External id": 464078,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453080280.289, "dur": 3.048, + "args": { + "External id": 464079,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453080305.163, "dur": 38.838, + "args": { + "External id": 464080,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453080418.426, "dur": 521.596, + "args": { + "External id": 464081,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453080457.733, "dur": 475.170, + "args": { + "External id": 464082,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17174, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453080470.260, "dur": 455.554, + "args": { + "External id": 464083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453080972.103, "dur": 3.288, + "args": { + "External id": 464084,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17176, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338707, "tid": 2338707, + "ts": 6267453081133.229, "dur": 31970.774, + "args": { + "External id": 464085,"Record function id": 0, "Ev Idx": 17177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453081253.927, "dur": 7.794, + "args": { + "External id": 464086,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453081265.805, "dur": 1.981, + "args": { + "External id": 464087,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453081269.849, "dur": 4.802, + "args": { + "External id": 464088,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453081277.123, "dur": 1.252, + "args": { + "External id": 464089,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453081280.193, "dur": 1.406, + "args": { + "External id": 464090,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453081283.219, "dur": 0.952, + "args": { + "External id": 464091,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453081288.445, "dur": 1.205, + "args": { + "External id": 464092,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453081291.912, "dur": 3.159, + "args": { + "External id": 464093,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453081296.573, "dur": 1.286, + "args": { + "External id": 464094,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453081299.508, "dur": 0.915, + "args": { + "External id": 464095,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453081323.721, "dur": 31699.123, + "args": { + "External id": 464096,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453081342.472, "dur": 31669.646, + "args": { + "External id": 464097,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453081363.239, "dur": 19.853, + "args": { + "External id": 464098,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453081387.303, "dur": 31577.995, + "args": { + "External id": 464099,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453081390.542, "dur": 31573.062, + "args": { + "External id": 464100,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453081397.704, "dur": 6.536, + "args": { + "External id": 464101,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453081406.319, "dur": 31554.044, + "args": { + "External id": 464102,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453113276.869, "dur": 42.861, + "args": { + "External id": 464103,"Sequence number": 5036612, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17195 + } + }, + { + "ph": "s", "id": 431, "pid": 2338707, "tid": 2338707, "ts": 6267453113276.869, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453113301.498, "dur": 11.674, + "args": { + "External id": 464104,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453113306.692, "dur": 6.225, + "args": { + "External id": 464105,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453113411.998, "dur": 92.806, + "args": { + "External id": 464106,"Record function id": 0, "Ev Idx": 17198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453113506.365, "dur": 1384.482, + "args": { + "External id": 464107,"Record function id": 0, "Ev Idx": 17199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453113553.330, "dur": 1321.071, + "args": { + "External id": 464108,"Sequence number": 5036613, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17200 + } + }, + { + "ph": "s", "id": 430, "pid": 2338707, "tid": 2338707, "ts": 6267453113553.330, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453113660.646, "dur": 64.981, + "args": { + "External id": 464109,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453113743.886, "dur": 121.941, + "args": { + "External id": 464110,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453113880.397, "dur": 46.516, + "args": { + "External id": 464111,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453113941.255, "dur": 35.896, + "args": { + "External id": 464112,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453114008.284, "dur": 31.882, + "args": { + "External id": 464113,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453114108.686, "dur": 27.260, + "args": { + "External id": 464114,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453114165.845, "dur": 169.106, + "args": { + "External id": 464115,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453114226.466, "dur": 15.842, + "args": { + "External id": 464116,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453114233.658, "dur": 7.693, + "args": { + "External id": 464117,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453114246.482, "dur": 4.959, + "args": { + "External id": 464118,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453114256.226, "dur": 1.348, + "args": { + "External id": 464119,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453114260.799, "dur": 7.540, + "args": { + "External id": 464120,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453114349.680, "dur": 67.267, + "args": { + "External id": 464121,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453114455.615, "dur": 35.628, + "args": { + "External id": 464122,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453114505.714, "dur": 52.213, + "args": { + "External id": 464123,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453114567.796, "dur": 60.503, + "args": { + "External id": 464124,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453114661.905, "dur": 32.811, + "args": { + "External id": 464125,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453114704.261, "dur": 46.340, + "args": { + "External id": 464126,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453114771.261, "dur": 23.756, + "args": { + "External id": 464127,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17219 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 2338707, "tid": 2338707, + "ts": 6267453114967.746, "dur": 133.153, + "args": { + "External id": 464128,"Record function id": 0, "Ev Idx": 17220 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453115197.216, "dur": 56.982, + "args": { + "External id": 464129,"Record function id": 0, "Ev Idx": 17221 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 2338707, "tid": 2338707, + "ts": 6267453115264.412, "dur": 32526.948, + "args": { + "External id": 464130,"Record function id": 0, "Ev Idx": 17222 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338707, "tid": 2338707, + "ts": 6267453115273.942, "dur": 1128.303, + "args": { + "External id": 464131,"Record function id": 0, "Ev Idx": 17223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453115371.499, "dur": 12.709, + "args": { + "External id": 464132,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453115401.177, "dur": 46.044, + "args": { + "External id": 464133,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115408.043, "dur": 3.152, + "args": { + "External id": 464134,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115415.696, "dur": 0.674, + "args": { + "External id": 464135,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115418.085, "dur": 0.602, + "args": { + "External id": 464136,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115419.945, "dur": 0.441, + "args": { + "External id": 464137,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115424.341, "dur": 0.762, + "args": { + "External id": 464138,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115426.676, "dur": 0.685, + "args": { + "External id": 464139,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115429.294, "dur": 5.483, + "args": { + "External id": 464140,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115436.169, "dur": 0.389, + "args": { + "External id": 464141,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115438.375, "dur": 0.285, + "args": { + "External id": 464142,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453115461.082, "dur": 64.440, + "args": { + "External id": 464143,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453115568.164, "dur": 174.424, + "args": { + "External id": 464144,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453115581.101, "dur": 4.857, + "args": { + "External id": 464145,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453115609.487, "dur": 14.773, + "args": { + "External id": 464146,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453115616.212, "dur": 7.605, + "args": { + "External id": 464147,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115620.979, "dur": 0.748, + "args": { + "External id": 464148,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453115634.456, "dur": 39.692, + "args": { + "External id": 464149,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115637.162, "dur": 3.152, + "args": { + "External id": 464150,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115641.800, "dur": 0.686, + "args": { + "External id": 464151,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115644.561, "dur": 0.602, + "args": { + "External id": 464152,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115649.506, "dur": 3.091, + "args": { + "External id": 464153,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115654.140, "dur": 0.478, + "args": { + "External id": 464154,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115655.869, "dur": 0.344, + "args": { + "External id": 464155,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115660.906, "dur": 0.500, + "args": { + "External id": 464156,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115662.878, "dur": 0.375, + "args": { + "External id": 464157,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453115665.003, "dur": 2.474, + "args": { + "External id": 464158,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453115690.796, "dur": 41.760, + "args": { + "External id": 464159,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453115805.994, "dur": 475.383, + "args": { + "External id": 464160,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453115842.990, "dur": 431.119, + "args": { + "External id": 464161,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17253, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453115855.171, "dur": 407.742, + "args": { + "External id": 464162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453116313.823, "dur": 2.513, + "args": { + "External id": 464163,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17255, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338707, "tid": 2338707, + "ts": 6267453116428.082, "dur": 31091.503, + "args": { + "External id": 464164,"Record function id": 0, "Ev Idx": 17256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453116544.599, "dur": 7.789, + "args": { + "External id": 464165,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453116556.356, "dur": 1.353, + "args": { + "External id": 464166,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453116559.639, "dur": 4.518, + "args": { + "External id": 464167,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453116566.539, "dur": 1.267, + "args": { + "External id": 464168,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453116569.358, "dur": 1.034, + "args": { + "External id": 464169,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453116571.920, "dur": 1.693, + "args": { + "External id": 464170,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453116577.891, "dur": 1.386, + "args": { + "External id": 464171,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453116581.226, "dur": 3.317, + "args": { + "External id": 464172,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453116586.233, "dur": 1.235, + "args": { + "External id": 464173,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453116589.392, "dur": 1.194, + "args": { + "External id": 464174,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453116634.142, "dur": 30816.307, + "args": { + "External id": 464175,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453116655.011, "dur": 30782.893, + "args": { + "External id": 464176,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453116673.793, "dur": 18.360, + "args": { + "External id": 464177,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453116696.489, "dur": 30687.940, + "args": { + "External id": 464178,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453116699.592, "dur": 30684.033, + "args": { + "External id": 464179,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453116709.632, "dur": 6.971, + "args": { + "External id": 464180,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453116718.820, "dur": 30659.110, + "args": { + "External id": 464181,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453147714.838, "dur": 44.325, + "args": { + "External id": 464182,"Sequence number": 5036614, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17274 + } + }, + { + "ph": "s", "id": 429, "pid": 2338707, "tid": 2338707, "ts": 6267453147714.838, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453147737.444, "dur": 14.790, + "args": { + "External id": 464183,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453147743.860, "dur": 7.988, + "args": { + "External id": 464184,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453147841.877, "dur": 90.640, + "args": { + "External id": 464185,"Record function id": 0, "Ev Idx": 17277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453147935.263, "dur": 1403.519, + "args": { + "External id": 464186,"Record function id": 0, "Ev Idx": 17278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453147981.624, "dur": 1340.204, + "args": { + "External id": 464187,"Sequence number": 5036615, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17279 + } + }, + { + "ph": "s", "id": 428, "pid": 2338707, "tid": 2338707, "ts": 6267453147981.624, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453148102.538, "dur": 67.801, + "args": { + "External id": 464188,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453148190.831, "dur": 119.740, + "args": { + "External id": 464189,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453148327.748, "dur": 45.676, + "args": { + "External id": 464190,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453148388.252, "dur": 35.637, + "args": { + "External id": 464191,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453148455.348, "dur": 34.825, + "args": { + "External id": 464192,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453148517.344, "dur": 21.075, + "args": { + "External id": 464193,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453148570.866, "dur": 181.892, + "args": { + "External id": 464194,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453148650.471, "dur": 15.626, + "args": { + "External id": 464195,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453148657.663, "dur": 7.116, + "args": { + "External id": 464196,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453148669.771, "dur": 5.620, + "args": { + "External id": 464197,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453148677.324, "dur": 1.291, + "args": { + "External id": 464198,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453148681.538, "dur": 6.202, + "args": { + "External id": 464199,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453148768.826, "dur": 67.670, + "args": { + "External id": 464200,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453148876.963, "dur": 33.875, + "args": { + "External id": 464201,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453148924.401, "dur": 50.732, + "args": { + "External id": 464202,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453148986.597, "dur": 40.199, + "args": { + "External id": 464203,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453149094.815, "dur": 36.707, + "args": { + "External id": 464204,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453149141.140, "dur": 50.862, + "args": { + "External id": 464205,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453149217.853, "dur": 24.043, + "args": { + "External id": 464206,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17298 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 2338707, "tid": 2338707, + "ts": 6267453149413.784, "dur": 93.984, + "args": { + "External id": 464207,"Record function id": 0, "Ev Idx": 17299 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453149615.371, "dur": 59.688, + "args": { + "External id": 464208,"Record function id": 0, "Ev Idx": 17300 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 2338707, "tid": 2338707, + "ts": 6267453149685.793, "dur": 30904.577, + "args": { + "External id": 464209,"Record function id": 0, "Ev Idx": 17301 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338707, "tid": 2338707, + "ts": 6267453149695.712, "dur": 1113.536, + "args": { + "External id": 464210,"Record function id": 0, "Ev Idx": 17302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453149788.941, "dur": 11.613, + "args": { + "External id": 464211,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453149817.476, "dur": 46.336, + "args": { + "External id": 464212,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453149824.394, "dur": 2.564, + "args": { + "External id": 464213,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453149832.376, "dur": 0.520, + "args": { + "External id": 464214,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453149834.426, "dur": 0.682, + "args": { + "External id": 464215,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453149836.478, "dur": 0.657, + "args": { + "External id": 464216,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453149842.088, "dur": 0.471, + "args": { + "External id": 464217,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453149843.908, "dur": 0.891, + "args": { + "External id": 464218,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453149846.934, "dur": 4.451, + "args": { + "External id": 464219,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453149852.684, "dur": 0.362, + "args": { + "External id": 464220,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453149855.108, "dur": 0.467, + "args": { + "External id": 464221,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453149876.719, "dur": 62.335, + "args": { + "External id": 464222,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453149977.024, "dur": 202.699, + "args": { + "External id": 464223,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453149989.656, "dur": 4.522, + "args": { + "External id": 464224,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453150003.135, "dur": 11.893, + "args": { + "External id": 464225,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453150008.340, "dur": 6.207, + "args": { + "External id": 464226,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453150012.595, "dur": 0.544, + "args": { + "External id": 464227,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453150022.162, "dur": 78.576, + "args": { + "External id": 464228,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453150024.468, "dur": 3.360, + "args": { + "External id": 464229,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453150029.354, "dur": 0.561, + "args": { + "External id": 464230,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453150031.475, "dur": 0.590, + "args": { + "External id": 464231,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453150036.235, "dur": 2.841, + "args": { + "External id": 464232,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453150040.404, "dur": 0.518, + "args": { + "External id": 464233,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453150042.523, "dur": 0.508, + "args": { + "External id": 464234,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453150046.733, "dur": 0.431, + "args": { + "External id": 464235,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453150048.658, "dur": 0.547, + "args": { + "External id": 464236,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453150050.719, "dur": 40.899, + "args": { + "External id": 464237,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453150115.934, "dur": 52.900, + "args": { + "External id": 464238,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453150244.919, "dur": 447.050, + "args": { + "External id": 464239,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453150279.544, "dur": 406.173, + "args": { + "External id": 464240,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17332, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453150291.836, "dur": 386.726, + "args": { + "External id": 464241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453150720.999, "dur": 3.154, + "args": { + "External id": 464242,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17334, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338707, "tid": 2338707, + "ts": 6267453150835.539, "dur": 29507.858, + "args": { + "External id": 464243,"Record function id": 0, "Ev Idx": 17335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453150948.571, "dur": 7.521, + "args": { + "External id": 464244,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453150959.751, "dur": 1.240, + "args": { + "External id": 464245,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453150963.006, "dur": 4.306, + "args": { + "External id": 464246,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453150969.318, "dur": 1.326, + "args": { + "External id": 464247,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453150972.105, "dur": 1.115, + "args": { + "External id": 464248,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453150976.971, "dur": 1.040, + "args": { + "External id": 464249,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453150979.723, "dur": 0.993, + "args": { + "External id": 464250,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453150982.620, "dur": 2.360, + "args": { + "External id": 464251,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453150986.444, "dur": 0.899, + "args": { + "External id": 464252,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453150991.683, "dur": 0.694, + "args": { + "External id": 464253,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453151012.114, "dur": 29277.961, + "args": { + "External id": 464254,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453151028.877, "dur": 29251.226, + "args": { + "External id": 464255,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453151049.620, "dur": 59.151, + "args": { + "External id": 464256,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453151115.529, "dur": 29119.473, + "args": { + "External id": 464257,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453151118.629, "dur": 29114.691, + "args": { + "External id": 464258,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453151130.396, "dur": 6.935, + "args": { + "External id": 464259,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453151139.431, "dur": 29090.489, + "args": { + "External id": 464260,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453180507.296, "dur": 46.133, + "args": { + "External id": 464261,"Sequence number": 5036616, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17353 + } + }, + { + "ph": "s", "id": 427, "pid": 2338707, "tid": 2338707, "ts": 6267453180507.296, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453180534.160, "dur": 12.448, + "args": { + "External id": 464262,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453180539.371, "dur": 6.916, + "args": { + "External id": 464263,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453180659.091, "dur": 89.511, + "args": { + "External id": 464264,"Record function id": 0, "Ev Idx": 17356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453180750.492, "dur": 1389.734, + "args": { + "External id": 464265,"Record function id": 0, "Ev Idx": 17357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453180793.662, "dur": 1327.615, + "args": { + "External id": 464266,"Sequence number": 5036617, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17358 + } + }, + { + "ph": "s", "id": 426, "pid": 2338707, "tid": 2338707, "ts": 6267453180793.662, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453180878.652, "dur": 60.402, + "args": { + "External id": 464267,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453180953.885, "dur": 157.814, + "args": { + "External id": 464268,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453181133.884, "dur": 54.131, + "args": { + "External id": 464269,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453181204.221, "dur": 38.458, + "args": { + "External id": 464270,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453181276.664, "dur": 35.483, + "args": { + "External id": 464271,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453181336.825, "dur": 22.104, + "args": { + "External id": 464272,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453181387.633, "dur": 162.922, + "args": { + "External id": 464273,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453181449.291, "dur": 15.067, + "args": { + "External id": 464274,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453181456.771, "dur": 6.546, + "args": { + "External id": 464275,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453181468.395, "dur": 5.261, + "args": { + "External id": 464276,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453181474.983, "dur": 1.457, + "args": { + "External id": 464277,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453181479.628, "dur": 5.890, + "args": { + "External id": 464278,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453181564.820, "dur": 78.593, + "args": { + "External id": 464279,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453181685.488, "dur": 37.233, + "args": { + "External id": 464280,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453181735.912, "dur": 54.374, + "args": { + "External id": 464281,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453181803.813, "dur": 42.129, + "args": { + "External id": 464282,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453181872.606, "dur": 31.997, + "args": { + "External id": 464283,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453181913.616, "dur": 42.558, + "args": { + "External id": 464284,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453181976.366, "dur": 24.253, + "args": { + "External id": 464285,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17377 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 2338707, "tid": 2338707, + "ts": 6267453182217.272, "dur": 94.708, + "args": { + "External id": 464286,"Record function id": 0, "Ev Idx": 17378 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453182405.321, "dur": 57.535, + "args": { + "External id": 464287,"Record function id": 0, "Ev Idx": 17379 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 2338707, "tid": 2338707, + "ts": 6267453182473.901, "dur": 32622.337, + "args": { + "External id": 464288,"Record function id": 0, "Ev Idx": 17380 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338707, "tid": 2338707, + "ts": 6267453182483.668, "dur": 1102.391, + "args": { + "External id": 464289,"Record function id": 0, "Ev Idx": 17381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453182580.390, "dur": 29.359, + "args": { + "External id": 464290,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453182628.762, "dur": 43.472, + "args": { + "External id": 464291,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182635.777, "dur": 2.767, + "args": { + "External id": 464292,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182643.196, "dur": 0.496, + "args": { + "External id": 464293,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182645.443, "dur": 0.684, + "args": { + "External id": 464294,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182647.428, "dur": 0.553, + "args": { + "External id": 464295,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182652.227, "dur": 0.405, + "args": { + "External id": 464296,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182654.218, "dur": 0.648, + "args": { + "External id": 464297,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182656.487, "dur": 3.667, + "args": { + "External id": 464298,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182661.415, "dur": 0.320, + "args": { + "External id": 464299,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182663.648, "dur": 0.598, + "args": { + "External id": 464300,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453182685.921, "dur": 65.193, + "args": { + "External id": 464301,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453182795.442, "dur": 142.982, + "args": { + "External id": 464302,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453182808.712, "dur": 5.416, + "args": { + "External id": 464303,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453182820.153, "dur": 12.099, + "args": { + "External id": 464304,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453182825.246, "dur": 6.537, + "args": { + "External id": 464305,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182829.429, "dur": 0.823, + "args": { + "External id": 464306,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453182840.219, "dur": 37.883, + "args": { + "External id": 464307,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182842.855, "dur": 3.381, + "args": { + "External id": 464308,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182848.049, "dur": 0.640, + "args": { + "External id": 464309,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182850.138, "dur": 0.556, + "args": { + "External id": 464310,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182854.848, "dur": 3.154, + "args": { + "External id": 464311,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182859.486, "dur": 0.716, + "args": { + "External id": 464312,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182861.336, "dur": 0.492, + "args": { + "External id": 464313,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182865.995, "dur": 0.698, + "args": { + "External id": 464314,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182868.001, "dur": 0.346, + "args": { + "External id": 464315,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453182870.496, "dur": 2.505, + "args": { + "External id": 464316,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453182893.051, "dur": 36.079, + "args": { + "External id": 464317,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453183003.450, "dur": 465.207, + "args": { + "External id": 464318,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453183039.393, "dur": 422.752, + "args": { + "External id": 464319,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17411, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453183051.666, "dur": 402.897, + "args": { + "External id": 464320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453183499.887, "dur": 3.078, + "args": { + "External id": 464321,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17413, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338707, "tid": 2338707, + "ts": 6267453183632.784, "dur": 31176.132, + "args": { + "External id": 464322,"Record function id": 0, "Ev Idx": 17414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453183750.603, "dur": 7.883, + "args": { + "External id": 464323,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453183762.773, "dur": 1.453, + "args": { + "External id": 464324,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453183766.159, "dur": 3.666, + "args": { + "External id": 464325,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453183771.943, "dur": 1.035, + "args": { + "External id": 464326,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453183774.532, "dur": 1.163, + "args": { + "External id": 464327,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453183777.229, "dur": 1.072, + "args": { + "External id": 464328,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453183782.888, "dur": 1.114, + "args": { + "External id": 464329,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453183785.905, "dur": 3.238, + "args": { + "External id": 464330,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453183791.062, "dur": 0.729, + "args": { + "External id": 464331,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453183793.426, "dur": 0.970, + "args": { + "External id": 464332,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453183818.199, "dur": 30931.711, + "args": { + "External id": 464333,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453183836.819, "dur": 30902.310, + "args": { + "External id": 464334,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453183854.700, "dur": 19.808, + "args": { + "External id": 464335,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453183878.586, "dur": 30812.247, + "args": { + "External id": 464336,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453183881.743, "dur": 30807.273, + "args": { + "External id": 464337,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453183891.938, "dur": 6.774, + "args": { + "External id": 464338,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453183901.021, "dur": 30784.108, + "args": { + "External id": 464339,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453214980.992, "dur": 40.936, + "args": { + "External id": 464340,"Sequence number": 5036618, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17432 + } + }, + { + "ph": "s", "id": 425, "pid": 2338707, "tid": 2338707, "ts": 6267453214980.992, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453215004.430, "dur": 11.774, + "args": { + "External id": 464341,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453215009.662, "dur": 6.330, + "args": { + "External id": 464342,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453215151.565, "dur": 90.068, + "args": { + "External id": 464343,"Record function id": 0, "Ev Idx": 17435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453215243.560, "dur": 1400.454, + "args": { + "External id": 464344,"Record function id": 0, "Ev Idx": 17436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453215291.694, "dur": 1334.959, + "args": { + "External id": 464345,"Sequence number": 5036619, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17437 + } + }, + { + "ph": "s", "id": 424, "pid": 2338707, "tid": 2338707, "ts": 6267453215291.694, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453215377.580, "dur": 64.554, + "args": { + "External id": 464346,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453215459.956, "dur": 122.646, + "args": { + "External id": 464347,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453215619.127, "dur": 55.599, + "args": { + "External id": 464348,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453215689.949, "dur": 37.219, + "args": { + "External id": 464349,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453215765.082, "dur": 37.694, + "args": { + "External id": 464350,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453215824.162, "dur": 21.251, + "args": { + "External id": 464351,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453215875.433, "dur": 165.444, + "args": { + "External id": 464352,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453215937.963, "dur": 15.751, + "args": { + "External id": 464353,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453215945.530, "dur": 7.330, + "args": { + "External id": 464354,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453215958.397, "dur": 4.477, + "args": { + "External id": 464355,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453215964.569, "dur": 1.699, + "args": { + "External id": 464356,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453215969.609, "dur": 8.713, + "args": { + "External id": 464357,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453216093.081, "dur": 74.207, + "args": { + "External id": 464358,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453216210.759, "dur": 37.672, + "args": { + "External id": 464359,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453216259.567, "dur": 53.582, + "args": { + "External id": 464360,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453216323.991, "dur": 42.935, + "args": { + "External id": 464361,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453216392.614, "dur": 31.671, + "args": { + "External id": 464362,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453216437.092, "dur": 46.059, + "args": { + "External id": 464363,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453216506.551, "dur": 26.976, + "args": { + "External id": 464364,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17456 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 2338707, "tid": 2338707, + "ts": 6267453216722.029, "dur": 88.565, + "args": { + "External id": 464365,"Record function id": 0, "Ev Idx": 17457 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453216902.319, "dur": 56.600, + "args": { + "External id": 464366,"Record function id": 0, "Ev Idx": 17458 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 2338707, "tid": 2338707, + "ts": 6267453216969.249, "dur": 33031.056, + "args": { + "External id": 464367,"Record function id": 0, "Ev Idx": 17459 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338707, "tid": 2338707, + "ts": 6267453216978.193, "dur": 1240.243, + "args": { + "External id": 464368,"Record function id": 0, "Ev Idx": 17460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453217118.146, "dur": 12.325, + "args": { + "External id": 464369,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453217147.280, "dur": 46.963, + "args": { + "External id": 464370,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217154.238, "dur": 2.903, + "args": { + "External id": 464371,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217163.129, "dur": 0.455, + "args": { + "External id": 464372,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217165.322, "dur": 0.643, + "args": { + "External id": 464373,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217167.313, "dur": 0.727, + "args": { + "External id": 464374,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217172.280, "dur": 0.675, + "args": { + "External id": 464375,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217174.275, "dur": 0.662, + "args": { + "External id": 464376,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217176.374, "dur": 5.526, + "args": { + "External id": 464377,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217183.600, "dur": 0.559, + "args": { + "External id": 464378,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217185.693, "dur": 0.568, + "args": { + "External id": 464379,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453217208.829, "dur": 62.720, + "args": { + "External id": 464380,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453217312.667, "dur": 188.002, + "args": { + "External id": 464381,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453217326.103, "dur": 5.238, + "args": { + "External id": 464382,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453217337.049, "dur": 57.651, + "args": { + "External id": 464383,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453217342.507, "dur": 51.692, + "args": { + "External id": 464384,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217391.707, "dur": 0.812, + "args": { + "External id": 464385,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453217404.708, "dur": 34.293, + "args": { + "External id": 464386,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217406.748, "dur": 3.101, + "args": { + "External id": 464387,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217411.194, "dur": 0.424, + "args": { + "External id": 464388,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217412.872, "dur": 0.502, + "args": { + "External id": 464389,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217417.122, "dur": 2.942, + "args": { + "External id": 464390,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217421.123, "dur": 0.378, + "args": { + "External id": 464391,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217423.031, "dur": 0.536, + "args": { + "External id": 464392,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217426.925, "dur": 0.333, + "args": { + "External id": 464393,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217429.074, "dur": 0.369, + "args": { + "External id": 464394,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453217431.138, "dur": 2.672, + "args": { + "External id": 464395,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453217453.763, "dur": 37.081, + "args": { + "External id": 464396,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453217565.193, "dur": 467.191, + "args": { + "External id": 464397,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453217618.149, "dur": 407.789, + "args": { + "External id": 464398,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17490, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453217632.020, "dur": 386.652, + "args": { + "External id": 464399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453218117.815, "dur": 5.706, + "args": { + "External id": 464400,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17492, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338707, "tid": 2338707, + "ts": 6267453218245.601, "dur": 31525.459, + "args": { + "External id": 464401,"Record function id": 0, "Ev Idx": 17493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453218362.948, "dur": 7.797, + "args": { + "External id": 464402,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453218374.943, "dur": 1.144, + "args": { + "External id": 464403,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453218377.992, "dur": 4.095, + "args": { + "External id": 464404,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453218384.110, "dur": 1.200, + "args": { + "External id": 464405,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453218386.850, "dur": 1.372, + "args": { + "External id": 464406,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453218389.738, "dur": 1.049, + "args": { + "External id": 464407,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453218392.728, "dur": 0.795, + "args": { + "External id": 464408,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453218395.840, "dur": 2.667, + "args": { + "External id": 464409,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453218402.646, "dur": 1.348, + "args": { + "External id": 464410,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453218405.675, "dur": 1.396, + "args": { + "External id": 464411,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453218428.337, "dur": 31291.470, + "args": { + "External id": 464412,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453218447.397, "dur": 31261.996, + "args": { + "External id": 464413,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453218468.001, "dur": 18.652, + "args": { + "External id": 464414,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453218490.715, "dur": 31175.118, + "args": { + "External id": 464415,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453218493.970, "dur": 31169.995, + "args": { + "External id": 464416,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453218501.232, "dur": 6.732, + "args": { + "External id": 464417,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453218512.269, "dur": 31148.028, + "args": { + "External id": 464418,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453249934.094, "dur": 36.309, + "args": { + "External id": 464419,"Sequence number": 5036620, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17511 + } + }, + { + "ph": "s", "id": 423, "pid": 2338707, "tid": 2338707, "ts": 6267453249934.094, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453249954.458, "dur": 10.528, + "args": { + "External id": 464420,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453249959.677, "dur": 4.909, + "args": { + "External id": 464421,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453250049.526, "dur": 139.801, + "args": { + "External id": 464422,"Record function id": 0, "Ev Idx": 17514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453250192.738, "dur": 1384.542, + "args": { + "External id": 464423,"Record function id": 0, "Ev Idx": 17515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453250239.072, "dur": 1322.330, + "args": { + "External id": 464424,"Sequence number": 5036621, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17516 + } + }, + { + "ph": "s", "id": 422, "pid": 2338707, "tid": 2338707, "ts": 6267453250239.072, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453250328.764, "dur": 69.417, + "args": { + "External id": 464425,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453250415.699, "dur": 122.558, + "args": { + "External id": 464426,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453250553.887, "dur": 70.691, + "args": { + "External id": 464427,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453250641.675, "dur": 44.626, + "args": { + "External id": 464428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453250723.309, "dur": 33.007, + "args": { + "External id": 464429,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453250783.288, "dur": 22.988, + "args": { + "External id": 464430,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453250836.513, "dur": 161.186, + "args": { + "External id": 464431,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453250898.403, "dur": 15.383, + "args": { + "External id": 464432,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453250905.815, "dur": 7.034, + "args": { + "External id": 464433,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453250916.782, "dur": 5.360, + "args": { + "External id": 464434,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453250923.812, "dur": 1.509, + "args": { + "External id": 464435,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453250928.872, "dur": 6.018, + "args": { + "External id": 464436,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453251010.456, "dur": 104.768, + "args": { + "External id": 464437,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453251157.584, "dur": 37.441, + "args": { + "External id": 464438,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453251207.240, "dur": 59.782, + "args": { + "External id": 464439,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453251280.385, "dur": 41.568, + "args": { + "External id": 464440,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453251347.557, "dur": 34.278, + "args": { + "External id": 464441,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453251391.965, "dur": 43.278, + "args": { + "External id": 464442,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453251459.158, "dur": 20.784, + "args": { + "External id": 464443,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17535 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 2338707, "tid": 2338707, + "ts": 6267453251672.898, "dur": 92.865, + "args": { + "External id": 464444,"Record function id": 0, "Ev Idx": 17536 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453251857.692, "dur": 57.500, + "args": { + "External id": 464445,"Record function id": 0, "Ev Idx": 17537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 2338707, "tid": 2338707, + "ts": 6267453251926.377, "dur": 31630.111, + "args": { + "External id": 464446,"Record function id": 0, "Ev Idx": 17538 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338707, "tid": 2338707, + "ts": 6267453251935.645, "dur": 1159.764, + "args": { + "External id": 464447,"Record function id": 0, "Ev Idx": 17539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453252027.690, "dur": 12.092, + "args": { + "External id": 464448,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453252093.272, "dur": 50.250, + "args": { + "External id": 464449,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252102.708, "dur": 2.977, + "args": { + "External id": 464450,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252111.175, "dur": 0.471, + "args": { + "External id": 464451,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252113.568, "dur": 0.644, + "args": { + "External id": 464452,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252116.359, "dur": 0.574, + "args": { + "External id": 464453,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252121.120, "dur": 0.446, + "args": { + "External id": 464454,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252123.110, "dur": 0.460, + "args": { + "External id": 464455,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252125.478, "dur": 6.148, + "args": { + "External id": 464456,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252132.946, "dur": 0.450, + "args": { + "External id": 464457,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252135.123, "dur": 0.465, + "args": { + "External id": 464458,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453252157.884, "dur": 65.789, + "args": { + "External id": 464459,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453252273.164, "dur": 153.205, + "args": { + "External id": 464460,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453252290.965, "dur": 8.937, + "args": { + "External id": 464461,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453252306.590, "dur": 12.945, + "args": { + "External id": 464462,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453252312.218, "dur": 6.854, + "args": { + "External id": 464463,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252316.838, "dur": 0.858, + "args": { + "External id": 464464,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453252328.692, "dur": 35.614, + "args": { + "External id": 464465,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252330.814, "dur": 0.660, + "args": { + "External id": 464466,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252333.094, "dur": 2.829, + "args": { + "External id": 464467,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252336.934, "dur": 0.653, + "args": { + "External id": 464468,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252339.198, "dur": 2.844, + "args": { + "External id": 464469,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252346.351, "dur": 0.352, + "args": { + "External id": 464470,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252348.574, "dur": 0.518, + "args": { + "External id": 464471,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252350.007, "dur": 0.550, + "args": { + "External id": 464472,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252355.058, "dur": 0.480, + "args": { + "External id": 464473,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453252357.522, "dur": 0.440, + "args": { + "External id": 464474,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453252380.407, "dur": 35.925, + "args": { + "External id": 464475,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453252490.837, "dur": 446.920, + "args": { + "External id": 464476,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453252526.739, "dur": 403.796, + "args": { + "External id": 464477,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17569, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453252538.521, "dur": 384.766, + "args": { + "External id": 464478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453252970.792, "dur": 2.946, + "args": { + "External id": 464479,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17571, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338707, "tid": 2338707, + "ts": 6267453253128.469, "dur": 30175.179, + "args": { + "External id": 464480,"Record function id": 0, "Ev Idx": 17572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453253246.400, "dur": 8.070, + "args": { + "External id": 464481,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453253258.320, "dur": 1.277, + "args": { + "External id": 464482,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453253261.607, "dur": 4.422, + "args": { + "External id": 464483,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453253267.927, "dur": 1.029, + "args": { + "External id": 464484,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453253270.694, "dur": 1.192, + "args": { + "External id": 464485,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453253273.157, "dur": 0.853, + "args": { + "External id": 464486,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453253275.974, "dur": 1.092, + "args": { + "External id": 464487,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453253278.815, "dur": 3.582, + "args": { + "External id": 464488,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453253284.172, "dur": 1.051, + "args": { + "External id": 464489,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453253289.662, "dur": 0.865, + "args": { + "External id": 464490,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453253310.463, "dur": 29936.930, + "args": { + "External id": 464491,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453253329.715, "dur": 29905.900, + "args": { + "External id": 464492,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453253347.329, "dur": 18.160, + "args": { + "External id": 464493,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453253369.347, "dur": 29822.122, + "args": { + "External id": 464494,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453253372.458, "dur": 29817.697, + "args": { + "External id": 464495,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453253381.796, "dur": 6.684, + "args": { + "External id": 464496,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453253390.613, "dur": 29795.060, + "args": { + "External id": 464497,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453283478.809, "dur": 41.845, + "args": { + "External id": 464498,"Sequence number": 5036622, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17590 + } + }, + { + "ph": "s", "id": 421, "pid": 2338707, "tid": 2338707, "ts": 6267453283478.809, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453283501.210, "dur": 12.705, + "args": { + "External id": 464499,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453283506.565, "dur": 7.070, + "args": { + "External id": 464500,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453283631.838, "dur": 95.200, + "args": { + "External id": 464501,"Record function id": 0, "Ev Idx": 17593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453283729.273, "dur": 1458.814, + "args": { + "External id": 464502,"Record function id": 0, "Ev Idx": 17594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453283785.668, "dur": 1382.994, + "args": { + "External id": 464503,"Sequence number": 5036623, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17595 + } + }, + { + "ph": "s", "id": 420, "pid": 2338707, "tid": 2338707, "ts": 6267453283785.668, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453283878.253, "dur": 65.983, + "args": { + "External id": 464504,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453283962.325, "dur": 172.099, + "args": { + "External id": 464505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453284160.104, "dur": 52.980, + "args": { + "External id": 464506,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453284221.775, "dur": 35.716, + "args": { + "External id": 464507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453284293.610, "dur": 34.519, + "args": { + "External id": 464508,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453284354.686, "dur": 21.099, + "args": { + "External id": 464509,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453284405.972, "dur": 163.513, + "args": { + "External id": 464510,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453284466.480, "dur": 15.194, + "args": { + "External id": 464511,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453284473.644, "dur": 7.078, + "args": { + "External id": 464512,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453284485.799, "dur": 4.317, + "args": { + "External id": 464513,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453284491.659, "dur": 1.108, + "args": { + "External id": 464514,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453284498.525, "dur": 6.229, + "args": { + "External id": 464515,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453284583.378, "dur": 84.207, + "args": { + "External id": 464516,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453284713.866, "dur": 38.171, + "args": { + "External id": 464517,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453284763.946, "dur": 57.714, + "args": { + "External id": 464518,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453284831.701, "dur": 44.023, + "args": { + "External id": 464519,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453284904.458, "dur": 37.406, + "args": { + "External id": 464520,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453284948.721, "dur": 43.740, + "args": { + "External id": 464521,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453285015.102, "dur": 22.705, + "args": { + "External id": 464522,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17614 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 2338707, "tid": 2338707, + "ts": 6267453285269.999, "dur": 96.239, + "args": { + "External id": 464523,"Record function id": 0, "Ev Idx": 17615 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453285456.236, "dur": 55.872, + "args": { + "External id": 464524,"Record function id": 0, "Ev Idx": 17616 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 2338707, "tid": 2338707, + "ts": 6267453285522.853, "dur": 32226.374, + "args": { + "External id": 464525,"Record function id": 0, "Ev Idx": 17617 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338707, "tid": 2338707, + "ts": 6267453285530.655, "dur": 1168.557, + "args": { + "External id": 464526,"Record function id": 0, "Ev Idx": 17618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453285645.547, "dur": 13.228, + "args": { + "External id": 464527,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453285676.943, "dur": 50.386, + "args": { + "External id": 464528,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285683.740, "dur": 2.770, + "args": { + "External id": 464529,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285691.726, "dur": 0.649, + "args": { + "External id": 464530,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285695.801, "dur": 0.664, + "args": { + "External id": 464531,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285697.893, "dur": 0.829, + "args": { + "External id": 464532,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285703.235, "dur": 0.566, + "args": { + "External id": 464533,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285705.426, "dur": 0.809, + "args": { + "External id": 464534,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285708.162, "dur": 5.036, + "args": { + "External id": 464535,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285714.828, "dur": 0.550, + "args": { + "External id": 464536,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285717.502, "dur": 0.493, + "args": { + "External id": 464537,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453285741.681, "dur": 64.418, + "args": { + "External id": 464538,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453285847.950, "dur": 140.832, + "args": { + "External id": 464539,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453285863.003, "dur": 4.635, + "args": { + "External id": 464540,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453285873.816, "dur": 12.467, + "args": { + "External id": 464541,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453285879.247, "dur": 6.534, + "args": { + "External id": 464542,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285883.505, "dur": 0.911, + "args": { + "External id": 464543,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453285894.346, "dur": 35.050, + "args": { + "External id": 464544,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285896.384, "dur": 3.712, + "args": { + "External id": 464545,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285901.836, "dur": 0.693, + "args": { + "External id": 464546,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285903.958, "dur": 0.341, + "args": { + "External id": 464547,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285908.710, "dur": 2.744, + "args": { + "External id": 464548,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285912.612, "dur": 0.444, + "args": { + "External id": 464549,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285914.511, "dur": 0.477, + "args": { + "External id": 464550,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285916.636, "dur": 0.599, + "args": { + "External id": 464551,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285919.156, "dur": 0.573, + "args": { + "External id": 464552,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453285921.204, "dur": 2.789, + "args": { + "External id": 464553,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453285943.496, "dur": 35.307, + "args": { + "External id": 464554,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453286051.704, "dur": 506.123, + "args": { + "External id": 464555,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453286128.450, "dur": 422.800, + "args": { + "External id": 464556,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17648, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453286141.730, "dur": 402.726, + "args": { + "External id": 464557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453286588.655, "dur": 2.706, + "args": { + "External id": 464558,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17650, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338707, "tid": 2338707, + "ts": 6267453286725.282, "dur": 30732.189, + "args": { + "External id": 464559,"Record function id": 0, "Ev Idx": 17651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453286837.182, "dur": 7.811, + "args": { + "External id": 464560,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453286848.871, "dur": 1.314, + "args": { + "External id": 464561,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453286852.362, "dur": 3.679, + "args": { + "External id": 464562,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453286858.179, "dur": 0.986, + "args": { + "External id": 464563,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453286860.639, "dur": 1.293, + "args": { + "External id": 464564,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453286863.599, "dur": 0.906, + "args": { + "External id": 464565,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453286866.119, "dur": 1.029, + "args": { + "External id": 464566,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453286869.212, "dur": 3.407, + "args": { + "External id": 464567,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453286874.078, "dur": 0.836, + "args": { + "External id": 464568,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453286879.408, "dur": 1.281, + "args": { + "External id": 464569,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453286901.473, "dur": 30492.563, + "args": { + "External id": 464570,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453286920.057, "dur": 30461.709, + "args": { + "External id": 464571,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453286937.126, "dur": 18.729, + "args": { + "External id": 464572,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453286960.052, "dur": 30372.523, + "args": { + "External id": 464573,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453286963.330, "dur": 30367.339, + "args": { + "External id": 464574,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453286972.691, "dur": 6.533, + "args": { + "External id": 464575,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453286981.272, "dur": 30345.834, + "args": { + "External id": 464576,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453317657.082, "dur": 49.335, + "args": { + "External id": 464577,"Sequence number": 5036624, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17669 + } + }, + { + "ph": "s", "id": 419, "pid": 2338707, "tid": 2338707, "ts": 6267453317657.082, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453317685.605, "dur": 13.954, + "args": { + "External id": 464578,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453317691.647, "dur": 7.542, + "args": { + "External id": 464579,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453317803.233, "dur": 94.220, + "args": { + "External id": 464580,"Record function id": 0, "Ev Idx": 17672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453317899.316, "dur": 1488.891, + "args": { + "External id": 464581,"Record function id": 0, "Ev Idx": 17673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453317943.719, "dur": 1429.364, + "args": { + "External id": 464582,"Sequence number": 5036625, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17674 + } + }, + { + "ph": "s", "id": 418, "pid": 2338707, "tid": 2338707, "ts": 6267453317943.719, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453318031.779, "dur": 131.540, + "args": { + "External id": 464583,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453318189.382, "dur": 118.830, + "args": { + "External id": 464584,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453318324.400, "dur": 44.329, + "args": { + "External id": 464585,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453318379.278, "dur": 34.699, + "args": { + "External id": 464586,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453318497.776, "dur": 37.772, + "args": { + "External id": 464587,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453318568.014, "dur": 20.849, + "args": { + "External id": 464588,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453318636.306, "dur": 167.839, + "args": { + "External id": 464589,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453318700.253, "dur": 15.812, + "args": { + "External id": 464590,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453318707.490, "dur": 7.447, + "args": { + "External id": 464591,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453318720.360, "dur": 5.556, + "args": { + "External id": 464592,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453318727.654, "dur": 1.010, + "args": { + "External id": 464593,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453318731.419, "dur": 6.064, + "args": { + "External id": 464594,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453318818.408, "dur": 68.808, + "args": { + "External id": 464595,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453318928.331, "dur": 34.740, + "args": { + "External id": 464596,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453318973.904, "dur": 53.114, + "args": { + "External id": 464597,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453319041.364, "dur": 82.125, + "args": { + "External id": 464598,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453319158.933, "dur": 33.211, + "args": { + "External id": 464599,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453319201.963, "dur": 46.502, + "args": { + "External id": 464600,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453319268.848, "dur": 20.548, + "args": { + "External id": 464601,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17693 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 2338707, "tid": 2338707, + "ts": 6267453319466.238, "dur": 93.435, + "args": { + "External id": 464602,"Record function id": 0, "Ev Idx": 17694 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453319673.180, "dur": 58.722, + "args": { + "External id": 464603,"Record function id": 0, "Ev Idx": 17695 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 2338707, "tid": 2338707, + "ts": 6267453319742.963, "dur": 35765.458, + "args": { + "External id": 464604,"Record function id": 0, "Ev Idx": 17696 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338707, "tid": 2338707, + "ts": 6267453319751.946, "dur": 1046.362, + "args": { + "External id": 464605,"Record function id": 0, "Ev Idx": 17697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453319847.688, "dur": 12.705, + "args": { + "External id": 464606,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453319877.630, "dur": 47.295, + "args": { + "External id": 464607,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453319884.477, "dur": 3.019, + "args": { + "External id": 464608,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453319893.224, "dur": 0.566, + "args": { + "External id": 464609,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453319895.367, "dur": 0.431, + "args": { + "External id": 464610,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453319898.177, "dur": 0.799, + "args": { + "External id": 464611,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453319902.840, "dur": 0.587, + "args": { + "External id": 464612,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453319905.267, "dur": 0.837, + "args": { + "External id": 464613,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453319907.872, "dur": 4.532, + "args": { + "External id": 464614,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453319913.554, "dur": 0.507, + "args": { + "External id": 464615,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453319915.696, "dur": 0.469, + "args": { + "External id": 464616,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453319938.728, "dur": 64.527, + "args": { + "External id": 464617,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453320044.134, "dur": 199.287, + "args": { + "External id": 464618,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453320099.214, "dur": 7.673, + "args": { + "External id": 464619,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453320113.973, "dur": 13.816, + "args": { + "External id": 464620,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453320119.447, "dur": 7.843, + "args": { + "External id": 464621,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453320124.082, "dur": 1.233, + "args": { + "External id": 464622,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453320136.987, "dur": 37.020, + "args": { + "External id": 464623,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453320139.450, "dur": 3.438, + "args": { + "External id": 464624,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453320144.570, "dur": 0.499, + "args": { + "External id": 464625,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453320146.683, "dur": 0.485, + "args": { + "External id": 464626,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453320152.240, "dur": 2.930, + "args": { + "External id": 464627,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453320156.157, "dur": 0.554, + "args": { + "External id": 464628,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453320158.614, "dur": 0.274, + "args": { + "External id": 464629,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453320162.398, "dur": 0.361, + "args": { + "External id": 464630,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453320164.493, "dur": 0.369, + "args": { + "External id": 464631,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453320165.927, "dur": 2.550, + "args": { + "External id": 464632,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453320190.827, "dur": 41.849, + "args": { + "External id": 464633,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453320309.072, "dur": 371.497, + "args": { + "External id": 464634,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453320348.417, "dur": 326.174, + "args": { + "External id": 464635,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17727, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453320360.614, "dur": 306.939, + "args": { + "External id": 464636,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453320712.055, "dur": 3.037, + "args": { + "External id": 464637,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17729, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338707, "tid": 2338707, + "ts": 6267453320823.862, "dur": 34439.463, + "args": { + "External id": 464638,"Record function id": 0, "Ev Idx": 17730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453320935.825, "dur": 7.763, + "args": { + "External id": 464639,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453320947.732, "dur": 1.177, + "args": { + "External id": 464640,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453320950.659, "dur": 3.334, + "args": { + "External id": 464641,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453320955.952, "dur": 0.934, + "args": { + "External id": 464642,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453320958.304, "dur": 1.072, + "args": { + "External id": 464643,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453320960.753, "dur": 1.198, + "args": { + "External id": 464644,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453320963.603, "dur": 1.057, + "args": { + "External id": 464645,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453320966.385, "dur": 3.193, + "args": { + "External id": 464646,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453320971.123, "dur": 1.134, + "args": { + "External id": 464647,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453320974.043, "dur": 0.941, + "args": { + "External id": 464648,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453320997.483, "dur": 34212.877, + "args": { + "External id": 464649,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453321015.031, "dur": 34185.081, + "args": { + "External id": 464650,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453321031.869, "dur": 18.430, + "args": { + "External id": 464651,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453321054.232, "dur": 34103.219, + "args": { + "External id": 464652,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453321098.283, "dur": 34057.424, + "args": { + "External id": 464653,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453321109.232, "dur": 8.096, + "args": { + "External id": 464654,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453321120.097, "dur": 34032.024, + "args": { + "External id": 464655,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453355434.174, "dur": 42.967, + "args": { + "External id": 464656,"Sequence number": 5036626, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17748 + } + }, + { + "ph": "s", "id": 417, "pid": 2338707, "tid": 2338707, "ts": 6267453355434.174, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453355458.240, "dur": 12.394, + "args": { + "External id": 464657,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453355463.596, "dur": 6.625, + "args": { + "External id": 464658,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453355558.261, "dur": 105.071, + "args": { + "External id": 464659,"Record function id": 0, "Ev Idx": 17751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453355666.158, "dur": 1374.952, + "args": { + "External id": 464660,"Record function id": 0, "Ev Idx": 17752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453355713.323, "dur": 1311.908, + "args": { + "External id": 464661,"Sequence number": 5036627, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17753 + } + }, + { + "ph": "s", "id": 416, "pid": 2338707, "tid": 2338707, "ts": 6267453355713.323, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453355798.042, "dur": 60.740, + "args": { + "External id": 464662,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453355875.941, "dur": 121.872, + "args": { + "External id": 464663,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453356017.941, "dur": 88.525, + "args": { + "External id": 464664,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453356122.797, "dur": 43.715, + "args": { + "External id": 464665,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453356205.303, "dur": 33.073, + "args": { + "External id": 464666,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453356263.764, "dur": 23.304, + "args": { + "External id": 464667,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453356317.856, "dur": 171.983, + "args": { + "External id": 464668,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453356382.923, "dur": 15.867, + "args": { + "External id": 464669,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453356390.257, "dur": 7.454, + "args": { + "External id": 464670,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453356403.403, "dur": 4.561, + "args": { + "External id": 464671,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453356409.471, "dur": 1.400, + "args": { + "External id": 464672,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453356417.544, "dur": 6.813, + "args": { + "External id": 464673,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453356504.417, "dur": 63.554, + "args": { + "External id": 464674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453356623.725, "dur": 39.124, + "args": { + "External id": 464675,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453356676.741, "dur": 59.171, + "args": { + "External id": 464676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453356746.853, "dur": 41.493, + "args": { + "External id": 464677,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453356815.889, "dur": 35.707, + "args": { + "External id": 464678,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453356858.479, "dur": 43.567, + "args": { + "External id": 464679,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453356922.691, "dur": 22.494, + "args": { + "External id": 464680,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17772 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 2338707, "tid": 2338707, + "ts": 6267453357157.874, "dur": 91.827, + "args": { + "External id": 464681,"Record function id": 0, "Ev Idx": 17773 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453357340.399, "dur": 57.249, + "args": { + "External id": 464682,"Record function id": 0, "Ev Idx": 17774 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 2338707, "tid": 2338707, + "ts": 6267453357408.141, "dur": 34101.105, + "args": { + "External id": 464683,"Record function id": 0, "Ev Idx": 17775 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338707, "tid": 2338707, + "ts": 6267453357417.606, "dur": 1119.451, + "args": { + "External id": 464684,"Record function id": 0, "Ev Idx": 17776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453357515.740, "dur": 11.129, + "args": { + "External id": 464685,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453357543.952, "dur": 69.785, + "args": { + "External id": 464686,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357551.259, "dur": 2.922, + "args": { + "External id": 464687,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357559.639, "dur": 0.628, + "args": { + "External id": 464688,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357562.082, "dur": 0.617, + "args": { + "External id": 464689,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357564.372, "dur": 0.482, + "args": { + "External id": 464690,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357568.946, "dur": 0.614, + "args": { + "External id": 464691,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357576.711, "dur": 0.506, + "args": { + "External id": 464692,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357579.120, "dur": 4.860, + "args": { + "External id": 464693,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357585.286, "dur": 0.352, + "args": { + "External id": 464694,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357589.837, "dur": 0.549, + "args": { + "External id": 464695,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453357630.642, "dur": 66.705, + "args": { + "External id": 464696,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453357738.855, "dur": 149.857, + "args": { + "External id": 464697,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453357752.885, "dur": 9.847, + "args": { + "External id": 464698,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453357769.171, "dur": 14.878, + "args": { + "External id": 464699,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453357774.311, "dur": 9.274, + "args": { + "External id": 464700,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357778.716, "dur": 3.312, + "args": { + "External id": 464701,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453357792.341, "dur": 34.050, + "args": { + "External id": 464702,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357794.791, "dur": 0.631, + "args": { + "External id": 464703,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357796.932, "dur": 0.805, + "args": { + "External id": 464704,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357799.268, "dur": 0.443, + "args": { + "External id": 464705,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357803.633, "dur": 2.839, + "args": { + "External id": 464706,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357807.581, "dur": 0.425, + "args": { + "External id": 464707,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357809.418, "dur": 2.868, + "args": { + "External id": 464708,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357814.400, "dur": 0.328, + "args": { + "External id": 464709,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357816.098, "dur": 0.374, + "args": { + "External id": 464710,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453357820.398, "dur": 0.370, + "args": { + "External id": 464711,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453357840.541, "dur": 38.618, + "args": { + "External id": 464712,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453357952.091, "dur": 465.001, + "args": { + "External id": 464713,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453357986.775, "dur": 423.282, + "args": { + "External id": 464714,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17806, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453357998.573, "dur": 403.977, + "args": { + "External id": 464715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453358450.330, "dur": 2.723, + "args": { + "External id": 464716,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17808, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338707, "tid": 2338707, + "ts": 6267453358562.461, "dur": 32692.841, + "args": { + "External id": 464717,"Record function id": 0, "Ev Idx": 17809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453358694.925, "dur": 7.567, + "args": { + "External id": 464718,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453358709.714, "dur": 1.303, + "args": { + "External id": 464719,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453358712.909, "dur": 3.702, + "args": { + "External id": 464720,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453358718.640, "dur": 1.024, + "args": { + "External id": 464721,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453358721.408, "dur": 0.923, + "args": { + "External id": 464722,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453358723.892, "dur": 0.970, + "args": { + "External id": 464723,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453358726.512, "dur": 0.679, + "args": { + "External id": 464724,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453358729.170, "dur": 2.700, + "args": { + "External id": 464725,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453358733.616, "dur": 0.941, + "args": { + "External id": 464726,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453358739.172, "dur": 1.096, + "args": { + "External id": 464727,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453358765.316, "dur": 32429.887, + "args": { + "External id": 464728,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453358784.724, "dur": 32399.623, + "args": { + "External id": 464729,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453358805.422, "dur": 19.689, + "args": { + "External id": 464730,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453358829.169, "dur": 32306.209, + "args": { + "External id": 464731,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453358832.452, "dur": 32301.181, + "args": { + "External id": 464732,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453358839.071, "dur": 6.927, + "args": { + "External id": 464733,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453358848.251, "dur": 32281.773, + "args": { + "External id": 464734,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453391430.577, "dur": 41.440, + "args": { + "External id": 464735,"Sequence number": 5036628, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17827 + } + }, + { + "ph": "s", "id": 415, "pid": 2338707, "tid": 2338707, "ts": 6267453391430.577, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453391453.355, "dur": 12.047, + "args": { + "External id": 464736,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453391458.765, "dur": 6.397, + "args": { + "External id": 464737,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453391569.628, "dur": 107.602, + "args": { + "External id": 464738,"Record function id": 0, "Ev Idx": 17830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453391680.339, "dur": 1448.573, + "args": { + "External id": 464739,"Record function id": 0, "Ev Idx": 17831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453391730.764, "dur": 1379.711, + "args": { + "External id": 464740,"Sequence number": 5036629, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17832 + } + }, + { + "ph": "s", "id": 414, "pid": 2338707, "tid": 2338707, "ts": 6267453391730.764, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453391822.070, "dur": 63.400, + "args": { + "External id": 464741,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453391902.314, "dur": 123.507, + "args": { + "External id": 464742,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453392042.577, "dur": 98.773, + "args": { + "External id": 464743,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453392161.046, "dur": 41.811, + "args": { + "External id": 464744,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453392239.484, "dur": 36.088, + "args": { + "External id": 464745,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453392307.453, "dur": 21.533, + "args": { + "External id": 464746,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453392357.409, "dur": 168.225, + "args": { + "External id": 464747,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453392425.278, "dur": 15.382, + "args": { + "External id": 464748,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453392432.525, "dur": 6.911, + "args": { + "External id": 464749,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453392444.839, "dur": 5.057, + "args": { + "External id": 464750,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453392451.247, "dur": 1.253, + "args": { + "External id": 464751,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453392455.646, "dur": 6.949, + "args": { + "External id": 464752,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453392539.688, "dur": 81.489, + "args": { + "External id": 464753,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453392665.052, "dur": 39.307, + "args": { + "External id": 464754,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453392715.629, "dur": 55.565, + "args": { + "External id": 464755,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453392785.567, "dur": 43.049, + "args": { + "External id": 464756,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453392858.829, "dur": 31.635, + "args": { + "External id": 464757,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453392902.990, "dur": 43.076, + "args": { + "External id": 464758,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453392968.175, "dur": 23.064, + "args": { + "External id": 464759,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17851 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 2338707, "tid": 2338707, + "ts": 6267453393207.952, "dur": 92.466, + "args": { + "External id": 464760,"Record function id": 0, "Ev Idx": 17852 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453393393.964, "dur": 60.671, + "args": { + "External id": 464761,"Record function id": 0, "Ev Idx": 17853 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 2338707, "tid": 2338707, + "ts": 6267453393464.443, "dur": 33250.516, + "args": { + "External id": 464762,"Record function id": 0, "Ev Idx": 17854 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338707, "tid": 2338707, + "ts": 6267453393475.303, "dur": 1111.791, + "args": { + "External id": 464763,"Record function id": 0, "Ev Idx": 17855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453393572.064, "dur": 11.885, + "args": { + "External id": 464764,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453393620.351, "dur": 44.799, + "args": { + "External id": 464765,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393627.186, "dur": 2.981, + "args": { + "External id": 464766,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393635.742, "dur": 0.773, + "args": { + "External id": 464767,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393638.679, "dur": 0.818, + "args": { + "External id": 464768,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393640.790, "dur": 0.512, + "args": { + "External id": 464769,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393645.254, "dur": 0.683, + "args": { + "External id": 464770,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393647.294, "dur": 0.664, + "args": { + "External id": 464771,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393649.457, "dur": 2.947, + "args": { + "External id": 464772,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393653.831, "dur": 0.486, + "args": { + "External id": 464773,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393656.159, "dur": 0.620, + "args": { + "External id": 464774,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453393679.242, "dur": 69.567, + "args": { + "External id": 464775,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453393790.229, "dur": 147.208, + "args": { + "External id": 464776,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453393804.366, "dur": 5.665, + "args": { + "External id": 464777,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453393816.084, "dur": 12.353, + "args": { + "External id": 464778,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453393821.895, "dur": 6.084, + "args": { + "External id": 464779,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393825.863, "dur": 0.690, + "args": { + "External id": 464780,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453393835.694, "dur": 37.283, + "args": { + "External id": 464781,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393838.551, "dur": 3.646, + "args": { + "External id": 464782,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393843.590, "dur": 0.518, + "args": { + "External id": 464783,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393845.779, "dur": 0.503, + "args": { + "External id": 464784,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393850.306, "dur": 2.698, + "args": { + "External id": 464785,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393854.237, "dur": 0.561, + "args": { + "External id": 464786,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393856.103, "dur": 0.276, + "args": { + "External id": 464787,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393860.764, "dur": 0.413, + "args": { + "External id": 464788,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393862.507, "dur": 0.406, + "args": { + "External id": 464789,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453393864.316, "dur": 2.376, + "args": { + "External id": 464790,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453393888.879, "dur": 38.471, + "args": { + "External id": 464791,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453394007.139, "dur": 463.292, + "args": { + "External id": 464792,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453394047.251, "dur": 416.808, + "args": { + "External id": 464793,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17885, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453394097.848, "dur": 357.639, + "args": { + "External id": 464794,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453394501.129, "dur": 2.817, + "args": { + "External id": 464795,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17887, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338707, "tid": 2338707, + "ts": 6267453394631.348, "dur": 31799.542, + "args": { + "External id": 464796,"Record function id": 0, "Ev Idx": 17888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453394746.985, "dur": 8.027, + "args": { + "External id": 464797,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453394758.853, "dur": 1.159, + "args": { + "External id": 464798,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453394761.927, "dur": 3.362, + "args": { + "External id": 464799,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453394767.315, "dur": 1.465, + "args": { + "External id": 464800,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453394770.621, "dur": 1.169, + "args": { + "External id": 464801,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453394773.306, "dur": 1.114, + "args": { + "External id": 464802,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453394776.040, "dur": 1.131, + "args": { + "External id": 464803,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453394779.781, "dur": 3.106, + "args": { + "External id": 464804,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453394784.397, "dur": 1.228, + "args": { + "External id": 464805,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453394787.340, "dur": 0.764, + "args": { + "External id": 464806,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453394818.224, "dur": 31547.933, + "args": { + "External id": 464807,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453394836.283, "dur": 31517.834, + "args": { + "External id": 464808,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453394855.001, "dur": 19.893, + "args": { + "External id": 464809,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453394879.040, "dur": 31423.927, + "args": { + "External id": 464810,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453394882.052, "dur": 31419.957, + "args": { + "External id": 464811,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453394890.317, "dur": 6.689, + "args": { + "External id": 464812,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453394899.141, "dur": 31397.623, + "args": { + "External id": 464813,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453426632.448, "dur": 49.083, + "args": { + "External id": 464814,"Sequence number": 5036630, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17906 + } + }, + { + "ph": "s", "id": 413, "pid": 2338707, "tid": 2338707, "ts": 6267453426632.448, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453426658.868, "dur": 15.856, + "args": { + "External id": 464815,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453426665.221, "dur": 8.943, + "args": { + "External id": 464816,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453426769.601, "dur": 92.086, + "args": { + "External id": 464817,"Record function id": 0, "Ev Idx": 17909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453426863.398, "dur": 1419.331, + "args": { + "External id": 464818,"Record function id": 0, "Ev Idx": 17910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453426912.272, "dur": 1354.401, + "args": { + "External id": 464819,"Sequence number": 5036631, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17911 + } + }, + { + "ph": "s", "id": 412, "pid": 2338707, "tid": 2338707, "ts": 6267453426912.272, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453426997.734, "dur": 100.401, + "args": { + "External id": 464820,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453427122.065, "dur": 119.584, + "args": { + "External id": 464821,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453427259.393, "dur": 47.249, + "args": { + "External id": 464822,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453427317.820, "dur": 35.961, + "args": { + "External id": 464823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453427387.310, "dur": 33.719, + "args": { + "External id": 464824,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453427449.322, "dur": 21.493, + "args": { + "External id": 464825,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453427500.029, "dur": 189.141, + "args": { + "External id": 464826,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453427564.972, "dur": 15.281, + "args": { + "External id": 464827,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453427572.186, "dur": 7.129, + "args": { + "External id": 464828,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453427584.437, "dur": 5.124, + "args": { + "External id": 464829,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453427607.239, "dur": 3.914, + "args": { + "External id": 464830,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453427615.703, "dur": 6.381, + "args": { + "External id": 464831,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453427703.757, "dur": 67.168, + "args": { + "External id": 464832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453427811.379, "dur": 34.389, + "args": { + "External id": 464833,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453427858.078, "dur": 57.562, + "args": { + "External id": 464834,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453427934.179, "dur": 42.315, + "args": { + "External id": 464835,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453428005.283, "dur": 31.814, + "args": { + "External id": 464836,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453428047.208, "dur": 86.794, + "args": { + "External id": 464837,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453428160.862, "dur": 24.745, + "args": { + "External id": 464838,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17930 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 2338707, "tid": 2338707, + "ts": 6267453428357.713, "dur": 93.223, + "args": { + "External id": 464839,"Record function id": 0, "Ev Idx": 17931 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453428539.387, "dur": 78.385, + "args": { + "External id": 464840,"Record function id": 0, "Ev Idx": 17932 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 2338707, "tid": 2338707, + "ts": 6267453428631.243, "dur": 32872.227, + "args": { + "External id": 464841,"Record function id": 0, "Ev Idx": 17933 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338707, "tid": 2338707, + "ts": 6267453428641.069, "dur": 1113.703, + "args": { + "External id": 464842,"Record function id": 0, "Ev Idx": 17934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453428739.630, "dur": 11.144, + "args": { + "External id": 464843,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453428767.713, "dur": 47.103, + "args": { + "External id": 464844,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453428774.851, "dur": 2.953, + "args": { + "External id": 464845,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453428782.644, "dur": 0.541, + "args": { + "External id": 464846,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453428784.842, "dur": 0.628, + "args": { + "External id": 464847,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453428786.980, "dur": 0.634, + "args": { + "External id": 464848,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453428792.097, "dur": 0.665, + "args": { + "External id": 464849,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453428793.746, "dur": 0.531, + "args": { + "External id": 464850,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453428796.054, "dur": 4.936, + "args": { + "External id": 464851,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453428802.169, "dur": 0.448, + "args": { + "External id": 464852,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453428804.216, "dur": 0.516, + "args": { + "External id": 464853,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453428829.348, "dur": 70.907, + "args": { + "External id": 464854,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453428942.688, "dur": 190.601, + "args": { + "External id": 464855,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453428955.711, "dur": 6.702, + "args": { + "External id": 464856,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453428969.144, "dur": 12.874, + "args": { + "External id": 464857,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453428974.664, "dur": 6.741, + "args": { + "External id": 464858,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453428979.332, "dur": 0.544, + "args": { + "External id": 464859,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453428989.803, "dur": 34.400, + "args": { + "External id": 464860,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453428991.962, "dur": 0.750, + "args": { + "External id": 464861,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453428994.423, "dur": 3.217, + "args": { + "External id": 464862,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453428998.964, "dur": 0.640, + "args": { + "External id": 464863,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453429001.441, "dur": 2.600, + "args": { + "External id": 464864,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453429007.818, "dur": 0.600, + "args": { + "External id": 464865,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453429009.614, "dur": 0.640, + "args": { + "External id": 464866,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453429011.471, "dur": 0.462, + "args": { + "External id": 464867,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453429015.765, "dur": 0.530, + "args": { + "External id": 464868,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453429017.845, "dur": 0.377, + "args": { + "External id": 464869,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453429037.575, "dur": 83.318, + "args": { + "External id": 464870,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453429201.593, "dur": 432.527, + "args": { + "External id": 464871,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453429238.380, "dur": 389.041, + "args": { + "External id": 464872,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17964, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453429250.460, "dur": 368.006, + "args": { + "External id": 464873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453429664.641, "dur": 3.621, + "args": { + "External id": 464874,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17966, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338707, "tid": 2338707, + "ts": 6267453429778.770, "dur": 31463.730, + "args": { + "External id": 464875,"Record function id": 0, "Ev Idx": 17967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453429896.082, "dur": 7.425, + "args": { + "External id": 464876,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453429907.173, "dur": 4.403, + "args": { + "External id": 464877,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453429913.480, "dur": 3.953, + "args": { + "External id": 464878,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453429919.380, "dur": 1.216, + "args": { + "External id": 464879,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453429922.252, "dur": 1.182, + "args": { + "External id": 464880,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453429924.936, "dur": 1.277, + "args": { + "External id": 464881,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453429928.308, "dur": 1.332, + "args": { + "External id": 464882,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453429931.481, "dur": 2.436, + "args": { + "External id": 464883,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453429935.800, "dur": 0.948, + "args": { + "External id": 464884,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453429940.399, "dur": 0.664, + "args": { + "External id": 464885,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453429961.847, "dur": 31226.185, + "args": { + "External id": 464886,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453429979.367, "dur": 31198.284, + "args": { + "External id": 464887,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453429996.463, "dur": 19.825, + "args": { + "External id": 464888,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453430020.664, "dur": 31108.039, + "args": { + "External id": 464889,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453430023.665, "dur": 31103.254, + "args": { + "External id": 464890,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453430033.724, "dur": 6.591, + "args": { + "External id": 464891,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453430042.235, "dur": 31080.993, + "args": { + "External id": 464892,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453461418.407, "dur": 46.334, + "args": { + "External id": 464893,"Sequence number": 5036632, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17985 + } + }, + { + "ph": "s", "id": 411, "pid": 2338707, "tid": 2338707, "ts": 6267453461418.407, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453461445.908, "dur": 12.213, + "args": { + "External id": 464894,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453461451.298, "dur": 6.576, + "args": { + "External id": 464895,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453461566.166, "dur": 105.813, + "args": { + "External id": 464896,"Record function id": 0, "Ev Idx": 17988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453461720.764, "dur": 1453.764, + "args": { + "External id": 464897,"Record function id": 0, "Ev Idx": 17989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453461775.018, "dur": 1381.476, + "args": { + "External id": 464898,"Sequence number": 5036633, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17990 + } + }, + { + "ph": "s", "id": 410, "pid": 2338707, "tid": 2338707, "ts": 6267453461775.018, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453461870.369, "dur": 65.825, + "args": { + "External id": 464899,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453461955.539, "dur": 165.129, + "args": { + "External id": 464900,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453462146.158, "dur": 55.930, + "args": { + "External id": 464901,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453462210.719, "dur": 36.043, + "args": { + "External id": 464902,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453462283.341, "dur": 38.012, + "args": { + "External id": 464903,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453462347.816, "dur": 22.961, + "args": { + "External id": 464904,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453462400.681, "dur": 164.534, + "args": { + "External id": 464905,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453462462.257, "dur": 14.977, + "args": { + "External id": 464906,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453462469.460, "dur": 6.750, + "args": { + "External id": 464907,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453462481.305, "dur": 4.744, + "args": { + "External id": 464908,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453462487.837, "dur": 1.023, + "args": { + "External id": 464909,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453462493.972, "dur": 6.444, + "args": { + "External id": 464910,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453462578.860, "dur": 87.596, + "args": { + "External id": 464911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453462712.949, "dur": 37.596, + "args": { + "External id": 464912,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453462762.070, "dur": 55.391, + "args": { + "External id": 464913,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453462829.465, "dur": 41.811, + "args": { + "External id": 464914,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453462900.232, "dur": 33.836, + "args": { + "External id": 464915,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453462940.762, "dur": 42.178, + "args": { + "External id": 464916,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453463005.463, "dur": 25.045, + "args": { + "External id": 464917,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18009 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 2338707, "tid": 2338707, + "ts": 6267453463252.145, "dur": 94.572, + "args": { + "External id": 464918,"Record function id": 0, "Ev Idx": 18010 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453463434.781, "dur": 55.119, + "args": { + "External id": 464919,"Record function id": 0, "Ev Idx": 18011 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 2338707, "tid": 2338707, + "ts": 6267453463499.873, "dur": 33527.521, + "args": { + "External id": 464920,"Record function id": 0, "Ev Idx": 18012 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338707, "tid": 2338707, + "ts": 6267453463508.710, "dur": 1197.865, + "args": { + "External id": 464921,"Record function id": 0, "Ev Idx": 18013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453463621.146, "dur": 12.906, + "args": { + "External id": 464922,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453463652.793, "dur": 45.717, + "args": { + "External id": 464923,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463659.450, "dur": 2.766, + "args": { + "External id": 464924,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463667.829, "dur": 0.520, + "args": { + "External id": 464925,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463670.211, "dur": 0.657, + "args": { + "External id": 464926,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463672.023, "dur": 0.747, + "args": { + "External id": 464927,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463677.155, "dur": 0.592, + "args": { + "External id": 464928,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463678.993, "dur": 0.487, + "args": { + "External id": 464929,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463681.101, "dur": 5.091, + "args": { + "External id": 464930,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463687.830, "dur": 0.603, + "args": { + "External id": 464931,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463690.390, "dur": 0.301, + "args": { + "External id": 464932,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453463713.257, "dur": 68.627, + "args": { + "External id": 464933,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453463827.996, "dur": 161.968, + "args": { + "External id": 464934,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453463842.402, "dur": 4.823, + "args": { + "External id": 464935,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453463860.745, "dur": 13.615, + "args": { + "External id": 464936,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453463866.066, "dur": 7.760, + "args": { + "External id": 464937,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463871.637, "dur": 0.571, + "args": { + "External id": 464938,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453463885.995, "dur": 35.102, + "args": { + "External id": 464939,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463888.541, "dur": 2.944, + "args": { + "External id": 464940,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463893.681, "dur": 0.748, + "args": { + "External id": 464941,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463896.144, "dur": 0.657, + "args": { + "External id": 464942,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463901.247, "dur": 2.413, + "args": { + "External id": 464943,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463904.872, "dur": 0.412, + "args": { + "External id": 464944,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463907.119, "dur": 0.331, + "args": { + "External id": 464945,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463908.931, "dur": 0.693, + "args": { + "External id": 464946,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463911.101, "dur": 0.473, + "args": { + "External id": 464947,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453463913.181, "dur": 2.850, + "args": { + "External id": 464948,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453463939.302, "dur": 41.036, + "args": { + "External id": 464949,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453464055.305, "dur": 509.116, + "args": { + "External id": 464950,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453464133.509, "dur": 424.428, + "args": { + "External id": 464951,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18043, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453464148.927, "dur": 402.560, + "args": { + "External id": 464952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453464609.888, "dur": 4.125, + "args": { + "External id": 464953,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18045, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338707, "tid": 2338707, + "ts": 6267453464733.364, "dur": 32020.967, + "args": { + "External id": 464954,"Record function id": 0, "Ev Idx": 18046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453464853.393, "dur": 8.199, + "args": { + "External id": 464955,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453464865.991, "dur": 1.309, + "args": { + "External id": 464956,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453464869.726, "dur": 3.566, + "args": { + "External id": 464957,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453464875.212, "dur": 1.339, + "args": { + "External id": 464958,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453464878.163, "dur": 1.268, + "args": { + "External id": 464959,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453464880.815, "dur": 1.402, + "args": { + "External id": 464960,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453464884.045, "dur": 1.102, + "args": { + "External id": 464961,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453464887.946, "dur": 2.753, + "args": { + "External id": 464962,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453464892.033, "dur": 0.871, + "args": { + "External id": 464963,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453464897.144, "dur": 0.919, + "args": { + "External id": 464964,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453464920.194, "dur": 31767.693, + "args": { + "External id": 464965,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453464938.922, "dur": 31736.329, + "args": { + "External id": 464966,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453464958.854, "dur": 18.999, + "args": { + "External id": 464967,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453464982.193, "dur": 31641.551, + "args": { + "External id": 464968,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453464985.020, "dur": 31636.977, + "args": { + "External id": 464969,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453464991.903, "dur": 6.564, + "args": { + "External id": 464970,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453465000.377, "dur": 31617.434, + "args": { + "External id": 464971,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453496943.774, "dur": 45.940, + "args": { + "External id": 464972,"Sequence number": 5036634, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18064 + } + }, + { + "ph": "s", "id": 409, "pid": 2338707, "tid": 2338707, "ts": 6267453496943.774, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453496969.161, "dur": 13.708, + "args": { + "External id": 464973,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453496975.054, "dur": 7.491, + "args": { + "External id": 464974,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453497122.491, "dur": 88.865, + "args": { + "External id": 464975,"Record function id": 0, "Ev Idx": 18067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453497213.967, "dur": 1421.144, + "args": { + "External id": 464976,"Record function id": 0, "Ev Idx": 18068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453497267.943, "dur": 1349.358, + "args": { + "External id": 464977,"Sequence number": 5036635, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18069 + } + }, + { + "ph": "s", "id": 408, "pid": 2338707, "tid": 2338707, "ts": 6267453497267.943, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453497358.255, "dur": 66.383, + "args": { + "External id": 464978,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453497442.239, "dur": 124.288, + "args": { + "External id": 464979,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453497582.677, "dur": 67.034, + "args": { + "External id": 464980,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453497665.580, "dur": 41.412, + "args": { + "External id": 464981,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453497741.591, "dur": 35.910, + "args": { + "External id": 464982,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453497810.145, "dur": 21.976, + "args": { + "External id": 464983,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453497863.688, "dur": 166.133, + "args": { + "External id": 464984,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453497926.233, "dur": 15.415, + "args": { + "External id": 464985,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453497933.274, "dur": 7.267, + "args": { + "External id": 464986,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453497945.918, "dur": 4.790, + "args": { + "External id": 464987,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453497952.140, "dur": 1.361, + "args": { + "External id": 464988,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453497956.723, "dur": 7.057, + "args": { + "External id": 464989,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453498043.982, "dur": 110.849, + "args": { + "External id": 464990,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453498202.563, "dur": 38.196, + "args": { + "External id": 464991,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453498251.855, "dur": 57.688, + "args": { + "External id": 464992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453498323.619, "dur": 42.609, + "args": { + "External id": 464993,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453498395.223, "dur": 34.835, + "args": { + "External id": 464994,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453498438.878, "dur": 42.001, + "args": { + "External id": 464995,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453498501.683, "dur": 21.009, + "args": { + "External id": 464996,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18088 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 2338707, "tid": 2338707, + "ts": 6267453498713.121, "dur": 98.093, + "args": { + "External id": 464997,"Record function id": 0, "Ev Idx": 18089 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453498904.005, "dur": 58.466, + "args": { + "External id": 464998,"Record function id": 0, "Ev Idx": 18090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.20)", "pid": 2338707, "tid": 2338707, + "ts": 6267453498973.556, "dur": 32515.867, + "args": { + "External id": 464999,"Record function id": 0, "Ev Idx": 18091 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338707, "tid": 2338707, + "ts": 6267453498983.853, "dur": 1160.327, + "args": { + "External id": 465000,"Record function id": 0, "Ev Idx": 18092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453499121.271, "dur": 13.013, + "args": { + "External id": 465001,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453499152.325, "dur": 47.012, + "args": { + "External id": 465002,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499159.691, "dur": 2.927, + "args": { + "External id": 465003,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499167.632, "dur": 0.542, + "args": { + "External id": 465004,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499170.253, "dur": 0.415, + "args": { + "External id": 465005,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499172.253, "dur": 0.548, + "args": { + "External id": 465006,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499176.839, "dur": 0.733, + "args": { + "External id": 465007,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499179.396, "dur": 0.634, + "args": { + "External id": 465008,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499181.395, "dur": 5.650, + "args": { + "External id": 465009,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499188.311, "dur": 0.603, + "args": { + "External id": 465010,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499190.765, "dur": 0.703, + "args": { + "External id": 465011,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453499214.627, "dur": 68.094, + "args": { + "External id": 465012,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453499324.159, "dur": 145.911, + "args": { + "External id": 465013,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453499338.226, "dur": 5.115, + "args": { + "External id": 465014,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453499349.272, "dur": 14.292, + "args": { + "External id": 465015,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453499354.450, "dur": 8.622, + "args": { + "External id": 465016,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499360.759, "dur": 0.876, + "args": { + "External id": 465017,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453499371.786, "dur": 37.218, + "args": { + "External id": 465018,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499374.007, "dur": 2.808, + "args": { + "External id": 465019,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499378.931, "dur": 0.424, + "args": { + "External id": 465020,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499380.800, "dur": 0.559, + "args": { + "External id": 465021,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499385.689, "dur": 2.688, + "args": { + "External id": 465022,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499389.580, "dur": 0.517, + "args": { + "External id": 465023,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499391.782, "dur": 0.632, + "args": { + "External id": 465024,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499395.841, "dur": 0.424, + "args": { + "External id": 465025,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499398.397, "dur": 0.401, + "args": { + "External id": 465026,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453499400.016, "dur": 2.908, + "args": { + "External id": 465027,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453499422.951, "dur": 37.185, + "args": { + "External id": 465028,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453499534.619, "dur": 450.629, + "args": { + "External id": 465029,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453499571.414, "dur": 407.353, + "args": { + "External id": 465030,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18122, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453499586.693, "dur": 385.026, + "args": { + "External id": 465031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453500015.403, "dur": 3.123, + "args": { + "External id": 465032,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18124, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338707, "tid": 2338707, + "ts": 6267453500172.283, "dur": 31066.557, + "args": { + "External id": 465033,"Record function id": 0, "Ev Idx": 18125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453500292.504, "dur": 7.886, + "args": { + "External id": 465034,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453500304.334, "dur": 1.323, + "args": { + "External id": 465035,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453500307.773, "dur": 3.756, + "args": { + "External id": 465036,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453500313.495, "dur": 1.064, + "args": { + "External id": 465037,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453500316.070, "dur": 1.036, + "args": { + "External id": 465038,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453500318.394, "dur": 1.229, + "args": { + "External id": 465039,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453500321.509, "dur": 1.452, + "args": { + "External id": 465040,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453500324.724, "dur": 2.522, + "args": { + "External id": 465041,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453500328.956, "dur": 0.948, + "args": { + "External id": 465042,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453500333.666, "dur": 0.976, + "args": { + "External id": 465043,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453500355.581, "dur": 30829.125, + "args": { + "External id": 465044,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453500373.365, "dur": 30800.861, + "args": { + "External id": 465045,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453500390.583, "dur": 19.296, + "args": { + "External id": 465046,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453500413.960, "dur": 30717.042, + "args": { + "External id": 465047,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453500417.369, "dur": 30711.917, + "args": { + "External id": 465048,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453500426.823, "dur": 6.197, + "args": { + "External id": 465049,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453500434.912, "dur": 30690.650, + "args": { + "External id": 465050,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453531411.617, "dur": 42.012, + "args": { + "External id": 465051,"Sequence number": 5036636, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18143 + } + }, + { + "ph": "s", "id": 407, "pid": 2338707, "tid": 2338707, "ts": 6267453531411.617, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453531434.877, "dur": 12.060, + "args": { + "External id": 465052,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453531440.404, "dur": 6.300, + "args": { + "External id": 465053,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453531542.152, "dur": 107.883, + "args": { + "External id": 465054,"Record function id": 0, "Ev Idx": 18146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453531653.098, "dur": 1387.217, + "args": { + "External id": 465055,"Record function id": 0, "Ev Idx": 18147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453531699.706, "dur": 1324.191, + "args": { + "External id": 465056,"Sequence number": 5036637, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18148 + } + }, + { + "ph": "s", "id": 406, "pid": 2338707, "tid": 2338707, "ts": 6267453531699.706, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453531786.031, "dur": 63.249, + "args": { + "External id": 465057,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453531864.853, "dur": 123.986, + "args": { + "External id": 465058,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453532005.724, "dur": 45.549, + "args": { + "External id": 465059,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453532110.295, "dur": 45.504, + "args": { + "External id": 465060,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453532194.059, "dur": 36.572, + "args": { + "External id": 465061,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453532259.860, "dur": 21.808, + "args": { + "External id": 465062,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453532310.349, "dur": 165.551, + "args": { + "External id": 465063,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453532374.655, "dur": 15.525, + "args": { + "External id": 465064,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453532382.053, "dur": 7.122, + "args": { + "External id": 465065,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453532394.256, "dur": 5.083, + "args": { + "External id": 465066,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453532401.101, "dur": 1.196, + "args": { + "External id": 465067,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453532405.077, "dur": 5.885, + "args": { + "External id": 465068,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453532489.473, "dur": 66.256, + "args": { + "External id": 465069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453532608.160, "dur": 45.701, + "args": { + "External id": 465070,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453532668.094, "dur": 59.804, + "args": { + "External id": 465071,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453532742.034, "dur": 41.900, + "args": { + "External id": 465072,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453532812.649, "dur": 32.493, + "args": { + "External id": 465073,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453532854.874, "dur": 46.505, + "args": { + "External id": 465074,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453532922.207, "dur": 23.726, + "args": { + "External id": 465075,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18167 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.20)", "pid": 2338707, "tid": 2338707, + "ts": 6267453533159.792, "dur": 92.784, + "args": { + "External id": 465076,"Record function id": 0, "Ev Idx": 18168 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453533343.890, "dur": 56.781, + "args": { + "External id": 465077,"Record function id": 0, "Ev Idx": 18169 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.21)", "pid": 2338707, "tid": 2338707, + "ts": 6267453533410.585, "dur": 27810.263, + "args": { + "External id": 465078,"Record function id": 0, "Ev Idx": 18170 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338707, "tid": 2338707, + "ts": 6267453533420.122, "dur": 1133.601, + "args": { + "External id": 465079,"Record function id": 0, "Ev Idx": 18171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453533516.715, "dur": 11.722, + "args": { + "External id": 465080,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453533545.123, "dur": 45.602, + "args": { + "External id": 465081,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533552.342, "dur": 2.551, + "args": { + "External id": 465082,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533560.530, "dur": 0.494, + "args": { + "External id": 465083,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533562.996, "dur": 0.509, + "args": { + "External id": 465084,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533564.825, "dur": 0.681, + "args": { + "External id": 465085,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533569.142, "dur": 0.746, + "args": { + "External id": 465086,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533571.037, "dur": 0.711, + "args": { + "External id": 465087,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533573.218, "dur": 5.558, + "args": { + "External id": 465088,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533580.555, "dur": 0.442, + "args": { + "External id": 465089,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533582.713, "dur": 0.343, + "args": { + "External id": 465090,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453533624.776, "dur": 65.493, + "args": { + "External id": 465091,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453533732.783, "dur": 151.120, + "args": { + "External id": 465092,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453533746.490, "dur": 5.517, + "args": { + "External id": 465093,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453533761.593, "dur": 13.277, + "args": { + "External id": 465094,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453533766.877, "dur": 7.526, + "args": { + "External id": 465095,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533771.388, "dur": 1.221, + "args": { + "External id": 465096,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453533782.689, "dur": 38.297, + "args": { + "External id": 465097,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533784.816, "dur": 3.255, + "args": { + "External id": 465098,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533790.093, "dur": 0.540, + "args": { + "External id": 465099,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533792.197, "dur": 0.534, + "args": { + "External id": 465100,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533797.337, "dur": 3.310, + "args": { + "External id": 465101,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533801.719, "dur": 0.284, + "args": { + "External id": 465102,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533803.424, "dur": 0.625, + "args": { + "External id": 465103,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533807.680, "dur": 0.401, + "args": { + "External id": 465104,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533809.867, "dur": 0.380, + "args": { + "External id": 465105,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453533812.052, "dur": 2.526, + "args": { + "External id": 465106,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453533838.117, "dur": 36.544, + "args": { + "External id": 465107,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453533947.129, "dur": 481.770, + "args": { + "External id": 465108,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453533983.017, "dur": 438.988, + "args": { + "External id": 465109,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18201, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453533994.877, "dur": 416.271, + "args": { + "External id": 465110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453534461.076, "dur": 3.229, + "args": { + "External id": 465111,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18203, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338707, "tid": 2338707, + "ts": 6267453534579.667, "dur": 26359.520, + "args": { + "External id": 465112,"Record function id": 0, "Ev Idx": 18204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453534717.003, "dur": 8.128, + "args": { + "External id": 465113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453534729.050, "dur": 1.267, + "args": { + "External id": 465114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453534732.673, "dur": 4.180, + "args": { + "External id": 465115,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453534738.815, "dur": 0.789, + "args": { + "External id": 465116,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453534741.198, "dur": 1.303, + "args": { + "External id": 465117,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453534744.232, "dur": 1.270, + "args": { + "External id": 465118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453534747.092, "dur": 1.255, + "args": { + "External id": 465119,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453534750.392, "dur": 2.466, + "args": { + "External id": 465120,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453534754.507, "dur": 1.096, + "args": { + "External id": 465121,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453534760.313, "dur": 0.989, + "args": { + "External id": 465122,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453534782.647, "dur": 26102.701, + "args": { + "External id": 465123,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453534800.753, "dur": 26074.162, + "args": { + "External id": 465124,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453534824.484, "dur": 17.716, + "args": { + "External id": 465125,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453534846.284, "dur": 25985.892, + "args": { + "External id": 465126,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453534849.134, "dur": 25981.278, + "args": { + "External id": 465127,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453534856.398, "dur": 6.572, + "args": { + "External id": 465128,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453534864.967, "dur": 25961.575, + "args": { + "External id": 465129,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453561140.018, "dur": 41.633, + "args": { + "External id": 465130,"Sequence number": 5036638, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18222 + } + }, + { + "ph": "s", "id": 405, "pid": 2338707, "tid": 2338707, "ts": 6267453561140.018, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453561162.663, "dur": 12.142, + "args": { + "External id": 465131,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453561167.592, "dur": 6.735, + "args": { + "External id": 465132,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453561274.225, "dur": 94.584, + "args": { + "External id": 465133,"Record function id": 0, "Ev Idx": 18225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453561370.622, "dur": 1393.803, + "args": { + "External id": 465134,"Record function id": 0, "Ev Idx": 18226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453561421.584, "dur": 1327.094, + "args": { + "External id": 465135,"Sequence number": 5036639, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18227 + } + }, + { + "ph": "s", "id": 404, "pid": 2338707, "tid": 2338707, "ts": 6267453561421.584, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453561501.808, "dur": 60.654, + "args": { + "External id": 465136,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453561578.781, "dur": 133.592, + "args": { + "External id": 465137,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453561733.597, "dur": 47.229, + "args": { + "External id": 465138,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453561793.636, "dur": 35.962, + "args": { + "External id": 465139,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453561864.854, "dur": 39.105, + "args": { + "External id": 465140,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453561933.319, "dur": 25.617, + "args": { + "External id": 465141,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453561988.754, "dur": 208.923, + "args": { + "External id": 465142,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453562048.738, "dur": 58.060, + "args": { + "External id": 465143,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453562095.573, "dur": 8.890, + "args": { + "External id": 465144,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453562109.792, "dur": 5.364, + "args": { + "External id": 465145,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453562116.449, "dur": 1.620, + "args": { + "External id": 465146,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453562121.298, "dur": 6.200, + "args": { + "External id": 465147,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453562217.546, "dur": 69.672, + "args": { + "External id": 465148,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453562326.484, "dur": 37.083, + "args": { + "External id": 465149,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453562374.141, "dur": 59.378, + "args": { + "External id": 465150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453562447.871, "dur": 43.316, + "args": { + "External id": 465151,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453562518.993, "dur": 30.224, + "args": { + "External id": 465152,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453562558.161, "dur": 59.438, + "args": { + "External id": 465153,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453562642.624, "dur": 23.161, + "args": { + "External id": 465154,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18246 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.21)", "pid": 2338707, "tid": 2338707, + "ts": 6267453562839.664, "dur": 89.551, + "args": { + "External id": 465155,"Record function id": 0, "Ev Idx": 18247 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453563019.113, "dur": 104.205, + "args": { + "External id": 465156,"Record function id": 0, "Ev Idx": 18248 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.22)", "pid": 2338707, "tid": 2338707, + "ts": 6267453563136.041, "dur": 30869.925, + "args": { + "External id": 465157,"Record function id": 0, "Ev Idx": 18249 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338707, "tid": 2338707, + "ts": 6267453563148.671, "dur": 1137.313, + "args": { + "External id": 465158,"Record function id": 0, "Ev Idx": 18250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453563246.537, "dur": 12.053, + "args": { + "External id": 465159,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453563276.519, "dur": 46.017, + "args": { + "External id": 465160,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563283.841, "dur": 2.860, + "args": { + "External id": 465161,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563292.254, "dur": 0.239, + "args": { + "External id": 465162,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563294.152, "dur": 0.549, + "args": { + "External id": 465163,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563296.028, "dur": 0.743, + "args": { + "External id": 465164,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563301.008, "dur": 0.277, + "args": { + "External id": 465165,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563303.065, "dur": 0.628, + "args": { + "External id": 465166,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563305.503, "dur": 5.670, + "args": { + "External id": 465167,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563312.552, "dur": 0.464, + "args": { + "External id": 465168,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563314.504, "dur": 0.359, + "args": { + "External id": 465169,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453563335.612, "dur": 68.883, + "args": { + "External id": 465170,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453563443.254, "dur": 147.518, + "args": { + "External id": 465171,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453563457.894, "dur": 5.083, + "args": { + "External id": 465172,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453563469.199, "dur": 12.816, + "args": { + "External id": 465173,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453563474.934, "dur": 6.576, + "args": { + "External id": 465174,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563479.298, "dur": 0.803, + "args": { + "External id": 465175,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453563490.104, "dur": 40.433, + "args": { + "External id": 465176,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563492.508, "dur": 2.730, + "args": { + "External id": 465177,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563497.198, "dur": 0.640, + "args": { + "External id": 465178,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563499.556, "dur": 0.719, + "args": { + "External id": 465179,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563504.612, "dur": 3.195, + "args": { + "External id": 465180,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563511.660, "dur": 0.644, + "args": { + "External id": 465181,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563513.982, "dur": 0.456, + "args": { + "External id": 465182,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563517.932, "dur": 0.682, + "args": { + "External id": 465183,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563520.401, "dur": 0.627, + "args": { + "External id": 465184,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453563522.628, "dur": 2.800, + "args": { + "External id": 465185,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453563543.135, "dur": 38.679, + "args": { + "External id": 465186,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453563679.769, "dur": 483.024, + "args": { + "External id": 465187,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453563717.333, "dur": 438.876, + "args": { + "External id": 465188,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18280, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453563728.704, "dur": 419.987, + "args": { + "External id": 465189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453564198.281, "dur": 3.168, + "args": { + "External id": 465190,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18282, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338707, "tid": 2338707, + "ts": 6267453564311.810, "dur": 29437.541, + "args": { + "External id": 465191,"Record function id": 0, "Ev Idx": 18283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453564422.561, "dur": 7.809, + "args": { + "External id": 465192,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453564434.048, "dur": 1.105, + "args": { + "External id": 465193,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453564437.074, "dur": 3.651, + "args": { + "External id": 465194,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453564442.689, "dur": 1.219, + "args": { + "External id": 465195,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453564445.383, "dur": 0.895, + "args": { + "External id": 465196,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453564447.930, "dur": 1.154, + "args": { + "External id": 465197,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453564451.231, "dur": 1.116, + "args": { + "External id": 465198,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453564454.313, "dur": 2.432, + "args": { + "External id": 465199,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453564458.959, "dur": 0.752, + "args": { + "External id": 465200,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453564463.852, "dur": 0.677, + "args": { + "External id": 465201,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453564484.881, "dur": 29207.751, + "args": { + "External id": 465202,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453564503.203, "dur": 29178.849, + "args": { + "External id": 465203,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453564519.408, "dur": 18.362, + "args": { + "External id": 465204,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453564542.275, "dur": 29096.173, + "args": { + "External id": 465205,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453564545.108, "dur": 29091.562, + "args": { + "External id": 465206,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453564554.778, "dur": 7.116, + "args": { + "External id": 465207,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453564563.719, "dur": 29069.191, + "args": { + "External id": 465208,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453593923.158, "dur": 45.478, + "args": { + "External id": 465209,"Sequence number": 5036640, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18301 + } + }, + { + "ph": "s", "id": 403, "pid": 2338707, "tid": 2338707, "ts": 6267453593923.158, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453593949.265, "dur": 12.610, + "args": { + "External id": 465210,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453593954.751, "dur": 6.752, + "args": { + "External id": 465211,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453594093.228, "dur": 92.805, + "args": { + "External id": 465212,"Record function id": 0, "Ev Idx": 18304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453594188.204, "dur": 1494.907, + "args": { + "External id": 465213,"Record function id": 0, "Ev Idx": 18305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453594238.571, "dur": 1426.853, + "args": { + "External id": 465214,"Sequence number": 5036641, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18306 + } + }, + { + "ph": "s", "id": 402, "pid": 2338707, "tid": 2338707, "ts": 6267453594238.571, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453594329.217, "dur": 65.808, + "args": { + "External id": 465215,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453594413.793, "dur": 121.392, + "args": { + "External id": 465216,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453594551.068, "dur": 65.162, + "args": { + "External id": 465217,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453594633.817, "dur": 43.520, + "args": { + "External id": 465218,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453594711.844, "dur": 33.469, + "args": { + "External id": 465219,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453594772.782, "dur": 21.996, + "args": { + "External id": 465220,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453594822.587, "dur": 160.123, + "args": { + "External id": 465221,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453594883.382, "dur": 16.036, + "args": { + "External id": 465222,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453594890.607, "dur": 7.811, + "args": { + "External id": 465223,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453594903.498, "dur": 4.640, + "args": { + "External id": 465224,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453594909.532, "dur": 1.278, + "args": { + "External id": 465225,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453594913.533, "dur": 6.372, + "args": { + "External id": 465226,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453594995.901, "dur": 104.764, + "args": { + "External id": 465227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453595147.220, "dur": 38.057, + "args": { + "External id": 465228,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453595198.030, "dur": 59.384, + "args": { + "External id": 465229,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453595271.111, "dur": 43.208, + "args": { + "External id": 465230,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453595342.568, "dur": 33.691, + "args": { + "External id": 465231,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453595385.386, "dur": 43.223, + "args": { + "External id": 465232,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453595530.840, "dur": 30.658, + "args": { + "External id": 465233,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18325 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.22)", "pid": 2338707, "tid": 2338707, + "ts": 6267453595761.115, "dur": 90.639, + "args": { + "External id": 465234,"Record function id": 0, "Ev Idx": 18326 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453595940.169, "dur": 55.943, + "args": { + "External id": 465235,"Record function id": 0, "Ev Idx": 18327 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.23)", "pid": 2338707, "tid": 2338707, + "ts": 6267453596005.859, "dur": 30310.456, + "args": { + "External id": 465236,"Record function id": 0, "Ev Idx": 18328 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338707, "tid": 2338707, + "ts": 6267453596014.241, "dur": 1187.513, + "args": { + "External id": 465237,"Record function id": 0, "Ev Idx": 18329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453596147.928, "dur": 12.807, + "args": { + "External id": 465238,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453596178.084, "dur": 51.571, + "args": { + "External id": 465239,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596185.090, "dur": 2.935, + "args": { + "External id": 465240,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596193.471, "dur": 0.504, + "args": { + "External id": 465241,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596195.960, "dur": 0.706, + "args": { + "External id": 465242,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596198.607, "dur": 0.487, + "args": { + "External id": 465243,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596203.972, "dur": 0.604, + "args": { + "External id": 465244,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596206.147, "dur": 0.657, + "args": { + "External id": 465245,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596210.961, "dur": 3.428, + "args": { + "External id": 465246,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596215.994, "dur": 0.581, + "args": { + "External id": 465247,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596221.747, "dur": 0.431, + "args": { + "External id": 465248,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453596244.831, "dur": 72.876, + "args": { + "External id": 465249,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453596357.235, "dur": 149.515, + "args": { + "External id": 465250,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453596371.630, "dur": 4.743, + "args": { + "External id": 465251,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453596382.972, "dur": 15.974, + "args": { + "External id": 465252,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453596391.808, "dur": 6.675, + "args": { + "External id": 465253,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596396.100, "dur": 0.816, + "args": { + "External id": 465254,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453596406.558, "dur": 36.108, + "args": { + "External id": 465255,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596408.834, "dur": 3.433, + "args": { + "External id": 465256,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596413.969, "dur": 0.276, + "args": { + "External id": 465257,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596415.480, "dur": 0.300, + "args": { + "External id": 465258,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596420.250, "dur": 3.616, + "args": { + "External id": 465259,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596425.552, "dur": 0.408, + "args": { + "External id": 465260,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596427.539, "dur": 0.606, + "args": { + "External id": 465261,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596431.656, "dur": 0.663, + "args": { + "External id": 465262,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596434.144, "dur": 0.450, + "args": { + "External id": 465263,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453596435.736, "dur": 2.419, + "args": { + "External id": 465264,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453596460.114, "dur": 37.294, + "args": { + "External id": 465265,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453596570.075, "dur": 470.015, + "args": { + "External id": 465266,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453596622.720, "dur": 410.202, + "args": { + "External id": 465267,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18359, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453596635.871, "dur": 390.375, + "args": { + "External id": 465268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453597105.411, "dur": 4.519, + "args": { + "External id": 465269,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18361, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338707, "tid": 2338707, + "ts": 6267453597229.546, "dur": 28790.529, + "args": { + "External id": 465270,"Record function id": 0, "Ev Idx": 18362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453597351.331, "dur": 7.782, + "args": { + "External id": 465271,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453597363.285, "dur": 1.553, + "args": { + "External id": 465272,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453597366.695, "dur": 3.737, + "args": { + "External id": 465273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453597372.574, "dur": 1.071, + "args": { + "External id": 465274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453597378.525, "dur": 1.172, + "args": { + "External id": 465275,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453597381.202, "dur": 1.222, + "args": { + "External id": 465276,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453597384.111, "dur": 1.226, + "args": { + "External id": 465277,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453597387.685, "dur": 2.442, + "args": { + "External id": 465278,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453597391.509, "dur": 0.867, + "args": { + "External id": 465279,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453597396.531, "dur": 1.005, + "args": { + "External id": 465280,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453597418.175, "dur": 28545.568, + "args": { + "External id": 465281,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453597436.108, "dur": 28517.581, + "args": { + "External id": 465282,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453597456.674, "dur": 18.711, + "args": { + "External id": 465283,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453597479.401, "dur": 28430.422, + "args": { + "External id": 465284,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453597482.440, "dur": 28425.526, + "args": { + "External id": 465285,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453597489.715, "dur": 6.254, + "args": { + "External id": 465286,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453597497.789, "dur": 28406.425, + "args": { + "External id": 465287,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453626233.689, "dur": 42.560, + "args": { + "External id": 465288,"Sequence number": 5036642, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18380 + } + }, + { + "ph": "s", "id": 401, "pid": 2338707, "tid": 2338707, "ts": 6267453626233.689, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453626257.034, "dur": 12.248, + "args": { + "External id": 465289,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453626262.365, "dur": 6.445, + "args": { + "External id": 465290,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453626375.241, "dur": 91.598, + "args": { + "External id": 465291,"Record function id": 0, "Ev Idx": 18383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453626468.568, "dur": 1373.403, + "args": { + "External id": 465292,"Record function id": 0, "Ev Idx": 18384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453626513.917, "dur": 1311.634, + "args": { + "External id": 465293,"Sequence number": 5036643, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18385 + } + }, + { + "ph": "s", "id": 400, "pid": 2338707, "tid": 2338707, "ts": 6267453626513.917, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453626614.091, "dur": 61.950, + "args": { + "External id": 465294,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453626697.677, "dur": 122.073, + "args": { + "External id": 465295,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453626835.955, "dur": 45.374, + "args": { + "External id": 465296,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453626892.144, "dur": 35.278, + "args": { + "External id": 465297,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453626958.618, "dur": 33.433, + "args": { + "External id": 465298,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453627020.487, "dur": 19.861, + "args": { + "External id": 465299,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453627108.963, "dur": 162.184, + "args": { + "External id": 465300,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453627169.848, "dur": 16.357, + "args": { + "External id": 465301,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453627176.714, "dur": 8.062, + "args": { + "External id": 465302,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453627190.174, "dur": 4.469, + "args": { + "External id": 465303,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453627196.211, "dur": 0.935, + "args": { + "External id": 465304,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453627200.221, "dur": 5.856, + "args": { + "External id": 465305,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453627286.701, "dur": 66.649, + "args": { + "External id": 465306,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453627395.829, "dur": 37.979, + "args": { + "External id": 465307,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453627445.268, "dur": 55.236, + "args": { + "External id": 465308,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453627515.169, "dur": 41.995, + "args": { + "External id": 465309,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453627585.895, "dur": 53.100, + "args": { + "External id": 465310,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453627651.081, "dur": 47.587, + "args": { + "External id": 465311,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453627722.157, "dur": 24.317, + "args": { + "External id": 465312,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18404 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.23)", "pid": 2338707, "tid": 2338707, + "ts": 6267453627917.720, "dur": 87.413, + "args": { + "External id": 465313,"Record function id": 0, "Ev Idx": 18405 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453628138.784, "dur": 59.486, + "args": { + "External id": 465314,"Record function id": 0, "Ev Idx": 18406 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.24)", "pid": 2338707, "tid": 2338707, + "ts": 6267453628208.808, "dur": 30873.690, + "args": { + "External id": 465315,"Record function id": 0, "Ev Idx": 18407 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338707, "tid": 2338707, + "ts": 6267453628218.635, "dur": 1159.295, + "args": { + "External id": 465316,"Record function id": 0, "Ev Idx": 18408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453628312.281, "dur": 11.986, + "args": { + "External id": 465317,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453628341.199, "dur": 45.936, + "args": { + "External id": 465318,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628348.033, "dur": 2.574, + "args": { + "External id": 465319,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628355.723, "dur": 0.536, + "args": { + "External id": 465320,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628357.897, "dur": 0.410, + "args": { + "External id": 465321,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628360.007, "dur": 0.393, + "args": { + "External id": 465322,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628364.440, "dur": 0.476, + "args": { + "External id": 465323,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628366.575, "dur": 0.380, + "args": { + "External id": 465324,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628368.564, "dur": 6.219, + "args": { + "External id": 465325,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628376.227, "dur": 0.433, + "args": { + "External id": 465326,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628378.384, "dur": 0.590, + "args": { + "External id": 465327,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453628400.279, "dur": 65.668, + "args": { + "External id": 465328,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453628505.445, "dur": 166.621, + "args": { + "External id": 465329,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453628520.008, "dur": 5.439, + "args": { + "External id": 465330,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453628531.576, "dur": 12.736, + "args": { + "External id": 465331,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453628537.225, "dur": 6.597, + "args": { + "External id": 465332,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628541.510, "dur": 0.750, + "args": { + "External id": 465333,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453628552.220, "dur": 35.523, + "args": { + "External id": 465334,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628554.385, "dur": 3.251, + "args": { + "External id": 465335,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628559.254, "dur": 0.580, + "args": { + "External id": 465336,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628561.541, "dur": 0.381, + "args": { + "External id": 465337,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628566.160, "dur": 2.658, + "args": { + "External id": 465338,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628570.596, "dur": 0.291, + "args": { + "External id": 465339,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628571.984, "dur": 0.281, + "args": { + "External id": 465340,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628576.627, "dur": 0.456, + "args": { + "External id": 465341,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628578.417, "dur": 0.312, + "args": { + "External id": 465342,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453628580.351, "dur": 2.308, + "args": { + "External id": 465343,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453628623.128, "dur": 38.360, + "args": { + "External id": 465344,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453628736.799, "dur": 517.977, + "args": { + "External id": 465345,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453628773.667, "dur": 473.748, + "args": { + "External id": 465346,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18438, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453628786.687, "dur": 452.927, + "args": { + "External id": 465347,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453629287.949, "dur": 3.467, + "args": { + "External id": 465348,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18440, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338707, "tid": 2338707, + "ts": 6267453629403.780, "dur": 29399.581, + "args": { + "External id": 465349,"Record function id": 0, "Ev Idx": 18441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453629513.152, "dur": 7.850, + "args": { + "External id": 465350,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453629525.023, "dur": 1.407, + "args": { + "External id": 465351,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453629528.446, "dur": 4.041, + "args": { + "External id": 465352,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453629534.520, "dur": 0.659, + "args": { + "External id": 465353,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453629537.140, "dur": 0.839, + "args": { + "External id": 465354,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453629539.519, "dur": 1.098, + "args": { + "External id": 465355,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453629542.432, "dur": 1.003, + "args": { + "External id": 465356,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453629545.391, "dur": 2.257, + "args": { + "External id": 465357,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453629549.198, "dur": 0.933, + "args": { + "External id": 465358,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453629554.404, "dur": 0.632, + "args": { + "External id": 465359,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453629575.712, "dur": 29174.302, + "args": { + "External id": 465360,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453629611.664, "dur": 29126.796, + "args": { + "External id": 465361,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453629630.772, "dur": 18.476, + "args": { + "External id": 465362,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453629653.601, "dur": 29040.477, + "args": { + "External id": 465363,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453629656.678, "dur": 29035.655, + "args": { + "External id": 465364,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453629666.377, "dur": 7.155, + "args": { + "External id": 465365,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453629675.571, "dur": 29013.216, + "args": { + "External id": 465366,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453658975.118, "dur": 41.236, + "args": { + "External id": 465367,"Sequence number": 5036644, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18459 + } + }, + { + "ph": "s", "id": 399, "pid": 2338707, "tid": 2338707, "ts": 6267453658975.118, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453658997.887, "dur": 11.784, + "args": { + "External id": 465368,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453659003.107, "dur": 6.327, + "args": { + "External id": 465369,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453659143.363, "dur": 87.612, + "args": { + "External id": 465370,"Record function id": 0, "Ev Idx": 18462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453659233.474, "dur": 1426.485, + "args": { + "External id": 465371,"Record function id": 0, "Ev Idx": 18463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453659279.138, "dur": 1361.461, + "args": { + "External id": 465372,"Sequence number": 5036645, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18464 + } + }, + { + "ph": "s", "id": 398, "pid": 2338707, "tid": 2338707, "ts": 6267453659279.138, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453659371.064, "dur": 66.423, + "args": { + "External id": 465373,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453659456.831, "dur": 120.425, + "args": { + "External id": 465374,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453659610.945, "dur": 57.218, + "args": { + "External id": 465375,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453659683.248, "dur": 38.649, + "args": { + "External id": 465376,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453659756.084, "dur": 39.873, + "args": { + "External id": 465377,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453659824.181, "dur": 21.092, + "args": { + "External id": 465378,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453659874.179, "dur": 159.332, + "args": { + "External id": 465379,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453659934.253, "dur": 15.412, + "args": { + "External id": 465380,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453659941.547, "dur": 7.096, + "args": { + "External id": 465381,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453659953.990, "dur": 5.014, + "args": { + "External id": 465382,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453659960.701, "dur": 1.277, + "args": { + "External id": 465383,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453659964.804, "dur": 5.818, + "args": { + "External id": 465384,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453660048.197, "dur": 110.645, + "args": { + "External id": 465385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453660204.253, "dur": 38.681, + "args": { + "External id": 465386,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453660254.585, "dur": 55.199, + "args": { + "External id": 465387,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453660326.310, "dur": 45.308, + "args": { + "External id": 465388,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453660402.721, "dur": 38.214, + "args": { + "External id": 465389,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453660450.804, "dur": 45.196, + "args": { + "External id": 465390,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453660518.241, "dur": 24.768, + "args": { + "External id": 465391,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18483 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.24)", "pid": 2338707, "tid": 2338707, + "ts": 6267453660739.316, "dur": 93.897, + "args": { + "External id": 465392,"Record function id": 0, "Ev Idx": 18484 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338707, "tid": 2338707, + "ts": 6267453660923.050, "dur": 57.044, + "args": { + "External id": 465393,"Record function id": 0, "Ev Idx": 18485 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.25)", "pid": 2338707, "tid": 2338707, + "ts": 6267453660990.422, "dur": 33140.633, + "args": { + "External id": 465394,"Record function id": 0, "Ev Idx": 18486 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2338707, "tid": 2338707, + "ts": 6267453660999.450, "dur": 1241.240, + "args": { + "External id": 465395,"Record function id": 0, "Ev Idx": 18487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453661143.968, "dur": 12.516, + "args": { + "External id": 465396,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453661174.015, "dur": 44.727, + "args": { + "External id": 465397,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661181.507, "dur": 2.925, + "args": { + "External id": 465398,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661189.424, "dur": 0.384, + "args": { + "External id": 465399,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661191.929, "dur": 0.743, + "args": { + "External id": 465400,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661193.714, "dur": 0.418, + "args": { + "External id": 465401,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661198.313, "dur": 0.565, + "args": { + "External id": 465402,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661200.267, "dur": 0.503, + "args": { + "External id": 465403,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661202.739, "dur": 4.962, + "args": { + "External id": 465404,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661209.251, "dur": 0.608, + "args": { + "External id": 465405,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661211.146, "dur": 0.686, + "args": { + "External id": 465406,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453661233.397, "dur": 65.004, + "args": { + "External id": 465407,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338707, "tid": 2338707, + "ts": 6267453661338.824, "dur": 191.233, + "args": { + "External id": 465408,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "1", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453661394.184, "dur": 9.338, + "args": { + "External id": 465409,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338707, "tid": 2338707, + "ts": 6267453661409.608, "dur": 12.155, + "args": { + "External id": 465410,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453661414.672, "dur": 6.608, + "args": { + "External id": 465411,"Record function id": 0, "Concrete Inputs": ["", "0", "27264000", "54528000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661418.854, "dur": 0.587, + "args": { + "External id": 465412,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338707, "tid": 2338707, + "ts": 6267453661429.941, "dur": 37.079, + "args": { + "External id": 465413,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661439.124, "dur": 0.435, + "args": { + "External id": 465414,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "27264000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661444.526, "dur": 0.376, + "args": { + "External id": 465415,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "27264512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661446.130, "dur": 0.393, + "args": { + "External id": 465416,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29361664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661448.315, "dur": 2.975, + "args": { + "External id": 465417,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29885952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661452.539, "dur": 0.371, + "args": { + "External id": 465418,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "30410240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661454.376, "dur": 0.269, + "args": { + "External id": 465419,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "32507392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661458.375, "dur": 0.423, + "args": { + "External id": 465420,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "32507904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661460.461, "dur": 0.328, + "args": { + "External id": 465421,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "39847936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453661461.941, "dur": 0.383, + "args": { + "External id": 465422,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "47187968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453661483.015, "dur": 36.660, + "args": { + "External id": 465423,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338707, "tid": 2338707, + "ts": 6267453661614.125, "dur": 504.922, + "args": { + "External id": 465424,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453661651.337, "dur": 460.779, + "args": { + "External id": 465425,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 1, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18517, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338707, "tid": 2338707, + "ts": 6267453661664.553, "dur": 436.523, + "args": { + "External id": 465426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267453662151.005, "dur": 3.360, + "args": { + "External id": 465427,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18519, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2338707, "tid": 2338707, + "ts": 6267453662266.027, "dur": 31586.173, + "args": { + "External id": 465428,"Record function id": 0, "Ev Idx": 18520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453662381.889, "dur": 7.922, + "args": { + "External id": 465429,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453662393.920, "dur": 1.284, + "args": { + "External id": 465430,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453662397.160, "dur": 3.806, + "args": { + "External id": 465431,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453662402.675, "dur": 0.891, + "args": { + "External id": 465432,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453662404.984, "dur": 1.271, + "args": { + "External id": 465433,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453662407.775, "dur": 1.159, + "args": { + "External id": 465434,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453662410.857, "dur": 0.792, + "args": { + "External id": 465435,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453662413.790, "dur": 2.381, + "args": { + "External id": 465436,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453662417.906, "dur": 0.914, + "args": { + "External id": 465437,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453662422.944, "dur": 0.762, + "args": { + "External id": 465438,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453662444.006, "dur": 31354.941, + "args": { + "External id": 465439,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453662461.697, "dur": 31326.841, + "args": { + "External id": 465440,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453662482.024, "dur": 20.987, + "args": { + "External id": 465441,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453662507.453, "dur": 31237.809, + "args": { + "External id": 465442,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453662511.214, "dur": 31232.305, + "args": { + "External id": 465443,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453662517.752, "dur": 6.840, + "args": { + "External id": 465444,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453662526.528, "dur": 31213.454, + "args": { + "External id": 465445,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453694027.634, "dur": 69.057, + "args": { + "External id": 465446,"Sequence number": 5036646, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18538 + } + }, + { + "ph": "s", "id": 397, "pid": 2338707, "tid": 2338707, "ts": 6267453694027.634, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453694047.948, "dur": 42.279, + "args": { + "External id": 465447,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453694082.726, "dur": 6.961, + "args": { + "External id": 465448,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453694180.308, "dur": 89.515, + "args": { + "External id": 465449,"Record function id": 0, "Ev Idx": 18541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338707, "tid": 2338707, + "ts": 6267453694272.308, "dur": 1363.297, + "args": { + "External id": 465450,"Record function id": 0, "Ev Idx": 18542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453694315.312, "dur": 1300.879, + "args": { + "External id": 465451,"Sequence number": 5036647, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18543 + } + }, + { + "ph": "s", "id": 396, "pid": 2338707, "tid": 2338707, "ts": 6267453694315.312, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453694399.787, "dur": 63.823, + "args": { + "External id": 465452,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453694480.923, "dur": 135.032, + "args": { + "External id": 465453,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453694635.054, "dur": 51.724, + "args": { + "External id": 465454,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453694697.347, "dur": 34.663, + "args": { + "External id": 465455,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453694763.927, "dur": 31.902, + "args": { + "External id": 465456,"kernel_hash": "cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/jw/cjwvznu7opd4dgjdmuzqwvdt6lhviut6sieddl3tkktzzxofvzt5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338707, "tid": 2338707, + "ts": 6267453694820.492, "dur": 20.703, + "args": { + "External id": 465457,"kernel_hash": "cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xw/cxwsdhjjh6geaxjsibzwiye546vfxkikrsjksa55ws62s4qjbrgi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453694869.263, "dur": 161.344, + "args": { + "External id": 465458,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453694929.671, "dur": 16.906, + "args": { + "External id": 465459,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453694937.101, "dur": 8.451, + "args": { + "External id": 465460,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453694951.026, "dur": 4.480, + "args": { + "External id": 465461,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453694957.047, "dur": 1.203, + "args": { + "External id": 465462,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453694961.102, "dur": 5.726, + "args": { + "External id": 465463,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453695042.684, "dur": 108.398, + "args": { + "External id": 465464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338707, "tid": 2338707, + "ts": 6267453695193.356, "dur": 39.581, + "args": { + "External id": 465465,"kernel_hash": "cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/dm/cdmhy2hmlhgvjhiufswlbupaei6azjf5dk6opbfvxrcm5fprtg43.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453695244.124, "dur": 54.565, + "args": { + "External id": 465466,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453695309.065, "dur": 41.731, + "args": { + "External id": 465467,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453695379.455, "dur": 33.166, + "args": { + "External id": 465468,"kernel_hash": "cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/b7/cb7afso2qkvt3i6pzg4vijqlu6jjlgwbjzzyh4oznhdt75ykajyu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453695421.852, "dur": 45.202, + "args": { + "External id": 465469,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338707, "tid": 2338707, + "ts": 6267453695488.667, "dur": 23.040, + "args": { + "External id": 465470,"kernel_hash": "cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/gi/cgik4ktlpri5iod5s5ynrzau3qlffebgfpe6sdr2lepxbydsdryb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18562 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.25)", "pid": 2338707, "tid": 2338707, + "ts": 6267453695714.828, "dur": 43.151, + "args": { + "External id": 465471,"Record function id": 0, "Ev Idx": 18563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453695929.360, "dur": 387.334, + "args": { + "External id": 465472,"Sequence number": 5036648, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18564 + } + }, + { + "ph": "s", "id": 395, "pid": 2338707, "tid": 2338707, "ts": 6267453695929.360, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453695966.582, "dur": 9.725, + "args": { + "External id": 465473,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453695969.149, "dur": 6.874, + "args": { + "External id": 465474,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453695989.038, "dur": 16.658, + "args": { + "External id": 465475,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453695993.478, "dur": 11.540, + "args": { + "External id": 465476,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453696015.332, "dur": 5.381, + "args": { + "External id": 465477,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453696289.229, "dur": 10.279, + "args": { + "External id": 465478,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453696293.723, "dur": 5.184, + "args": { + "External id": 465479,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453696350.007, "dur": 156.605, + "args": { + "External id": 465480,"Sequence number": 5036649, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453696352.473, "dur": 14.461, + "args": { + "External id": 465481,"Sequence number": 5036649, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18573 + } + }, + { + "ph": "s", "id": 394, "pid": 2338707, "tid": 2338707, "ts": 6267453696352.473, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453696357.838, "dur": 6.663, + "args": { + "External id": 465482,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453696361.521, "dur": 2.591, + "args": { + "External id": 465483,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453696369.017, "dur": 137.154, + "args": { + "External id": 465484,"Sequence number": 5036650, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453696374.423, "dur": 6.266, + "args": { + "External id": 465485,"Sequence number": 5036650, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453696375.440, "dur": 5.072, + "args": { + "External id": 465486,"Sequence number": 5036650, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18578 + } + }, + { + "ph": "s", "id": 393, "pid": 2338707, "tid": 2338707, "ts": 6267453696375.440, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453696382.482, "dur": 111.063, + "args": { + "External id": 465487,"Sequence number": 5036651, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18579 + } + }, + { + "ph": "s", "id": 392, "pid": 2338707, "tid": 2338707, "ts": 6267453696382.482, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453696497.719, "dur": 7.292, + "args": { + "External id": 465488,"Sequence number": 5036652, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18580 + } + }, + { + "ph": "s", "id": 391, "pid": 2338707, "tid": 2338707, "ts": 6267453696497.719, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453696519.889, "dur": 101.296, + "args": { + "External id": 465489,"Sequence number": 5036653, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453696520.951, "dur": 7.387, + "args": { + "External id": 465490,"Sequence number": 5036653, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18582 + } + }, + { + "ph": "s", "id": 390, "pid": 2338707, "tid": 2338707, "ts": 6267453696520.951, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453696523.023, "dur": 3.805, + "args": { + "External id": 465491,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453696525.479, "dur": 1.181, + "args": { + "External id": 465492,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453696529.275, "dur": 91.464, + "args": { + "External id": 465493,"Sequence number": 5036654, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453696532.841, "dur": 6.007, + "args": { + "External id": 465494,"Sequence number": 5036654, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453696533.961, "dur": 4.719, + "args": { + "External id": 465495,"Sequence number": 5036654, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18587 + } + }, + { + "ph": "s", "id": 389, "pid": 2338707, "tid": 2338707, "ts": 6267453696533.961, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453696539.981, "dur": 51.207, + "args": { + "External id": 465496,"Sequence number": 5036655, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18588 + } + }, + { + "ph": "s", "id": 388, "pid": 2338707, "tid": 2338707, "ts": 6267453696539.981, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453696613.608, "dur": 6.384, + "args": { + "External id": 465497,"Sequence number": 5036656, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18589 + } + }, + { + "ph": "s", "id": 387, "pid": 2338707, "tid": 2338707, "ts": 6267453696613.608, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453696632.988, "dur": 84.197, + "args": { + "External id": 465498,"Sequence number": 5036657, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453696636.048, "dur": 7.810, + "args": { + "External id": 465499,"Sequence number": 5036657, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18591 + } + }, + { + "ph": "s", "id": 386, "pid": 2338707, "tid": 2338707, "ts": 6267453696636.048, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453696638.402, "dur": 3.930, + "args": { + "External id": 465500,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453696640.707, "dur": 1.198, + "args": { + "External id": 465501,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453696644.963, "dur": 71.839, + "args": { + "External id": 465502,"Sequence number": 5036658, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453696646.474, "dur": 8.804, + "args": { + "External id": 465503,"Sequence number": 5036658, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453696649.483, "dur": 5.628, + "args": { + "External id": 465504,"Sequence number": 5036658, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18596 + } + }, + { + "ph": "s", "id": 385, "pid": 2338707, "tid": 2338707, "ts": 6267453696649.483, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453696655.963, "dur": 52.692, + "args": { + "External id": 465505,"Sequence number": 5036659, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18597 + } + }, + { + "ph": "s", "id": 384, "pid": 2338707, "tid": 2338707, "ts": 6267453696655.963, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453696710.888, "dur": 5.434, + "args": { + "External id": 465506,"Sequence number": 5036660, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18598 + } + }, + { + "ph": "s", "id": 383, "pid": 2338707, "tid": 2338707, "ts": 6267453696710.888, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453696748.649, "dur": 7.600, + "args": { + "External id": 465507,"Sequence number": 5036661, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453696752.183, "dur": 3.936, + "args": { + "External id": 465508,"Sequence number": 5036661, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18600 + } + }, + { + "ph": "s", "id": 382, "pid": 2338707, "tid": 2338707, "ts": 6267453696752.183, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453696767.343, "dur": 7.017, + "args": { + "External id": 465509,"Sequence number": 5036662, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453696768.928, "dur": 5.290, + "args": { + "External id": 465510,"Sequence number": 5036662, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18602 + } + }, + { + "ph": "s", "id": 381, "pid": 2338707, "tid": 2338707, "ts": 6267453696768.928, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453696779.674, "dur": 5.792, + "args": { + "External id": 465511,"Sequence number": 5036663, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453696781.161, "dur": 4.131, + "args": { + "External id": 465512,"Sequence number": 5036663, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18604 + } + }, + { + "ph": "s", "id": 380, "pid": 2338707, "tid": 2338707, "ts": 6267453696781.161, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453696833.275, "dur": 207.795, + "args": { + "External id": 465513,"Sequence number": 5036664, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18605 + } + }, + { + "ph": "s", "id": 379, "pid": 2338707, "tid": 2338707, "ts": 6267453696833.275, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453696860.849, "dur": 10.527, + "args": { + "External id": 465514,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453696864.755, "dur": 6.129, + "args": { + "External id": 465515,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453697096.227, "dur": 153.756, + "args": { + "External id": 465516,"Sequence number": 5036665, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18608 + } + }, + { + "ph": "s", "id": 378, "pid": 2338707, "tid": 2338707, "ts": 6267453697096.227, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453697118.248, "dur": 10.872, + "args": { + "External id": 465517,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453697121.538, "dur": 6.908, + "args": { + "External id": 465518,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338707, "tid": 2338707, + "ts": 6267453697291.842, "dur": 215.763, + "args": { + "External id": 465519,"Sequence number": 5036666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18611 + } + }, + { + "ph": "s", "id": 377, "pid": 2338707, "tid": 2338707, "ts": 6267453697291.842, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453697329.581, "dur": 146.862, + "args": { + "External id": 465520,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453697385.560, "dur": 12.355, + "args": { + "External id": 465521,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453697388.748, "dur": 8.588, + "args": { + "External id": 465522,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453697400.938, "dur": 4.497, + "args": { + "External id": 465523,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453697409.686, "dur": 1.218, + "args": { + "External id": 465524,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453697413.917, "dur": 4.591, + "args": { + "External id": 465525,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2338707, + "ts": 6267453697490.999, "dur": 6.002, + "args": { + "External id": 465526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453697514.600, "dur": 7.653, + "args": { + "External id": 465527,"Sequence number": 5036667, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453697516.624, "dur": 5.434, + "args": { + "External id": 465528,"Sequence number": 5036667, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18620 + } + }, + { + "ph": "s", "id": 376, "pid": 2338707, "tid": 2338707, "ts": 6267453697516.624, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453697539.238, "dur": 165.405, + "args": { + "External id": 465529,"Sequence number": 5036668, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453697540.912, "dur": 12.308, + "args": { + "External id": 465530,"Sequence number": 5036668, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18622 + } + }, + { + "ph": "s", "id": 375, "pid": 2338707, "tid": 2338707, "ts": 6267453697540.912, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453697546.515, "dur": 5.469, + "args": { + "External id": 465531,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453697549.535, "dur": 2.093, + "args": { + "External id": 465532,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453697554.887, "dur": 149.283, + "args": { + "External id": 465533,"Sequence number": 5036669, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453697557.398, "dur": 6.540, + "args": { + "External id": 465534,"Sequence number": 5036669, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453697558.664, "dur": 5.113, + "args": { + "External id": 465535,"Sequence number": 5036669, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18627 + } + }, + { + "ph": "s", "id": 374, "pid": 2338707, "tid": 2338707, "ts": 6267453697558.664, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453697565.039, "dur": 128.707, + "args": { + "External id": 465536,"Sequence number": 5036670, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18628 + } + }, + { + "ph": "s", "id": 373, "pid": 2338707, "tid": 2338707, "ts": 6267453697565.039, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453697698.340, "dur": 4.679, + "args": { + "External id": 465537,"Sequence number": 5036671, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18629 + } + }, + { + "ph": "s", "id": 372, "pid": 2338707, "tid": 2338707, "ts": 6267453697698.340, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453697752.109, "dur": 290.583, + "args": { + "External id": 465538,"Sequence number": 5036672, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18630 + } + }, + { + "ph": "s", "id": 371, "pid": 2338707, "tid": 2338707, "ts": 6267453697752.109, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453697782.252, "dur": 8.321, + "args": { + "External id": 465539,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453697786.067, "dur": 4.333, + "args": { + "External id": 465540,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453697798.173, "dur": 7.004, + "args": { + "External id": 465541,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453697799.818, "dur": 4.934, + "args": { + "External id": 465542,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453697800.841, "dur": 3.775, + "args": { + "External id": 465543,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453697815.843, "dur": 9.701, + "args": { + "External id": 465544,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453697818.754, "dur": 6.378, + "args": { + "External id": 465545,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453697835.577, "dur": 3.997, + "args": { + "External id": 465546,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453697843.983, "dur": 3.241, + "args": { + "External id": 465547,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453698013.926, "dur": 4.357, + "args": { + "External id": 465548,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453698015.525, "dur": 2.490, + "args": { + "External id": 465549,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453698021.779, "dur": 2.606, + "args": { + "External id": 465550,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453698023.072, "dur": 1.179, + "args": { + "External id": 465551,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453698114.380, "dur": 136.710, + "args": { + "External id": 465552,"Sequence number": 5036673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453698115.828, "dur": 11.440, + "args": { + "External id": 465553,"Sequence number": 5036673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18645 + } + }, + { + "ph": "s", "id": 370, "pid": 2338707, "tid": 2338707, "ts": 6267453698115.828, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453698119.862, "dur": 5.448, + "args": { + "External id": 465554,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453698122.660, "dur": 2.159, + "args": { + "External id": 465555,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453698128.730, "dur": 121.894, + "args": { + "External id": 465556,"Sequence number": 5036674, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453698131.013, "dur": 7.000, + "args": { + "External id": 465557,"Sequence number": 5036674, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453698134.558, "dur": 3.318, + "args": { + "External id": 465558,"Sequence number": 5036674, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18650 + } + }, + { + "ph": "s", "id": 369, "pid": 2338707, "tid": 2338707, "ts": 6267453698134.558, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453698138.928, "dur": 101.332, + "args": { + "External id": 465559,"Sequence number": 5036675, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18651 + } + }, + { + "ph": "s", "id": 368, "pid": 2338707, "tid": 2338707, "ts": 6267453698138.928, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453698243.362, "dur": 6.298, + "args": { + "External id": 465560,"Sequence number": 5036676, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18652 + } + }, + { + "ph": "s", "id": 367, "pid": 2338707, "tid": 2338707, "ts": 6267453698243.362, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453698262.333, "dur": 84.328, + "args": { + "External id": 465561,"Sequence number": 5036677, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453698263.217, "dur": 8.318, + "args": { + "External id": 465562,"Sequence number": 5036677, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18654 + } + }, + { + "ph": "s", "id": 366, "pid": 2338707, "tid": 2338707, "ts": 6267453698263.217, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453698266.919, "dur": 3.186, + "args": { + "External id": 465563,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453698269.078, "dur": 0.790, + "args": { + "External id": 465564,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453698272.129, "dur": 74.220, + "args": { + "External id": 465565,"Sequence number": 5036678, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453698273.843, "dur": 8.191, + "args": { + "External id": 465566,"Sequence number": 5036678, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453698277.639, "dur": 4.248, + "args": { + "External id": 465567,"Sequence number": 5036678, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18659 + } + }, + { + "ph": "s", "id": 365, "pid": 2338707, "tid": 2338707, "ts": 6267453698277.639, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453698282.937, "dur": 58.231, + "args": { + "External id": 465568,"Sequence number": 5036679, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18660 + } + }, + { + "ph": "s", "id": 364, "pid": 2338707, "tid": 2338707, "ts": 6267453698282.937, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453698343.540, "dur": 2.348, + "args": { + "External id": 465569,"Sequence number": 5036680, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18661 + } + }, + { + "ph": "s", "id": 363, "pid": 2338707, "tid": 2338707, "ts": 6267453698343.540, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453698376.744, "dur": 191.269, + "args": { + "External id": 465570,"Sequence number": 5036681, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18662 + } + }, + { + "ph": "s", "id": 362, "pid": 2338707, "tid": 2338707, "ts": 6267453698376.744, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453698430.911, "dur": 5.423, + "args": { + "External id": 465571,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453698474.319, "dur": 76.503, + "args": { + "External id": 465572,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453698475.049, "dur": 8.535, + "args": { + "External id": 465573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453698479.338, "dur": 3.209, + "args": { + "External id": 465574,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453698481.427, "dur": 0.881, + "args": { + "External id": 465575,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453698484.746, "dur": 65.691, + "args": { + "External id": 465576,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453698486.047, "dur": 4.636, + "args": { + "External id": 465577,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453698489.245, "dur": 1.308, + "args": { + "External id": 465578,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453698491.523, "dur": 54.531, + "args": { + "External id": 465579,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453698548.210, "dur": 1.442, + "args": { + "External id": 465580,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2338707, + "ts": 6267453698580.839, "dur": 48.459, + "args": { + "External id": 465581,"Sequence number": 5036682, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18673 + } + }, + { + "ph": "s", "id": 361, "pid": 2338707, "tid": 2338707, "ts": 6267453698580.839, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453698678.122, "dur": 232.344, + "args": { + "External id": 465582,"Sequence number": 5036683, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18674 + } + }, + { + "ph": "s", "id": 360, "pid": 2338707, "tid": 2338707, "ts": 6267453698678.122, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453698701.623, "dur": 6.788, + "args": { + "External id": 465583,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453698705.001, "dur": 2.999, + "args": { + "External id": 465584,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453698719.928, "dur": 9.185, + "args": { + "External id": 465585,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453698723.670, "dur": 4.964, + "args": { + "External id": 465586,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453698736.152, "dur": 4.039, + "args": { + "External id": 465587,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453698892.328, "dur": 3.647, + "args": { + "External id": 465588,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453698893.868, "dur": 1.848, + "args": { + "External id": 465589,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453698932.294, "dur": 115.403, + "args": { + "External id": 465590,"Sequence number": 5036684, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453698933.675, "dur": 12.211, + "args": { + "External id": 465591,"Sequence number": 5036684, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18683 + } + }, + { + "ph": "s", "id": 359, "pid": 2338707, "tid": 2338707, "ts": 6267453698933.675, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453698939.948, "dur": 4.611, + "args": { + "External id": 465592,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453698942.270, "dur": 1.850, + "args": { + "External id": 465593,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453698947.517, "dur": 99.770, + "args": { + "External id": 465594,"Sequence number": 5036685, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453698949.246, "dur": 9.281, + "args": { + "External id": 465595,"Sequence number": 5036685, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453698950.462, "dur": 7.913, + "args": { + "External id": 465596,"Sequence number": 5036685, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18688 + } + }, + { + "ph": "s", "id": 358, "pid": 2338707, "tid": 2338707, "ts": 6267453698950.462, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453698959.496, "dur": 75.306, + "args": { + "External id": 465597,"Sequence number": 5036686, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18689 + } + }, + { + "ph": "s", "id": 357, "pid": 2338707, "tid": 2338707, "ts": 6267453698959.496, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453699037.808, "dur": 8.720, + "args": { + "External id": 465598,"Sequence number": 5036687, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18690 + } + }, + { + "ph": "s", "id": 356, "pid": 2338707, "tid": 2338707, "ts": 6267453699037.808, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453699100.759, "dur": 97.450, + "args": { + "External id": 465599,"Sequence number": 5036688, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453699102.087, "dur": 13.743, + "args": { + "External id": 465600,"Sequence number": 5036688, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18692 + } + }, + { + "ph": "s", "id": 355, "pid": 2338707, "tid": 2338707, "ts": 6267453699102.087, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453699107.875, "dur": 6.167, + "args": { + "External id": 465601,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453699112.459, "dur": 1.250, + "args": { + "External id": 465602,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453699116.869, "dur": 80.978, + "args": { + "External id": 465603,"Sequence number": 5036689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453699118.349, "dur": 4.471, + "args": { + "External id": 465604,"Sequence number": 5036689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453699119.272, "dur": 3.401, + "args": { + "External id": 465605,"Sequence number": 5036689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18697 + } + }, + { + "ph": "s", "id": 354, "pid": 2338707, "tid": 2338707, "ts": 6267453699119.272, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453699126.298, "dur": 64.595, + "args": { + "External id": 465606,"Sequence number": 5036690, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18698 + } + }, + { + "ph": "s", "id": 353, "pid": 2338707, "tid": 2338707, "ts": 6267453699126.298, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453699193.008, "dur": 3.866, + "args": { + "External id": 465607,"Sequence number": 5036691, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18699 + } + }, + { + "ph": "s", "id": 352, "pid": 2338707, "tid": 2338707, "ts": 6267453699193.008, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453699208.690, "dur": 69.159, + "args": { + "External id": 465608,"Sequence number": 5036692, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453699209.577, "dur": 6.641, + "args": { + "External id": 465609,"Sequence number": 5036692, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18701 + } + }, + { + "ph": "s", "id": 351, "pid": 2338707, "tid": 2338707, "ts": 6267453699209.577, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453699211.478, "dur": 3.311, + "args": { + "External id": 465610,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453699213.705, "dur": 0.945, + "args": { + "External id": 465611,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453699219.573, "dur": 57.960, + "args": { + "External id": 465612,"Sequence number": 5036693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453699220.419, "dur": 8.632, + "args": { + "External id": 465613,"Sequence number": 5036693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453699221.446, "dur": 7.415, + "args": { + "External id": 465614,"Sequence number": 5036693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18706 + } + }, + { + "ph": "s", "id": 350, "pid": 2338707, "tid": 2338707, "ts": 6267453699221.446, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453699229.697, "dur": 40.725, + "args": { + "External id": 465615,"Sequence number": 5036694, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18707 + } + }, + { + "ph": "s", "id": 349, "pid": 2338707, "tid": 2338707, "ts": 6267453699229.697, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453699272.709, "dur": 4.354, + "args": { + "External id": 465616,"Sequence number": 5036695, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18708 + } + }, + { + "ph": "s", "id": 348, "pid": 2338707, "tid": 2338707, "ts": 6267453699272.709, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453699299.967, "dur": 6.540, + "args": { + "External id": 465617,"Sequence number": 5036696, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453699301.558, "dur": 4.791, + "args": { + "External id": 465618,"Sequence number": 5036696, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18710 + } + }, + { + "ph": "s", "id": 347, "pid": 2338707, "tid": 2338707, "ts": 6267453699301.558, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453699315.347, "dur": 3.085, + "args": { + "External id": 465619,"Sequence number": 5036697, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453699316.396, "dur": 1.901, + "args": { + "External id": 465620,"Sequence number": 5036697, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18712 + } + }, + { + "ph": "s", "id": 346, "pid": 2338707, "tid": 2338707, "ts": 6267453699316.396, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453699325.669, "dur": 3.159, + "args": { + "External id": 465621,"Sequence number": 5036698, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453699326.945, "dur": 1.751, + "args": { + "External id": 465622,"Sequence number": 5036698, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18714 + } + }, + { + "ph": "s", "id": 345, "pid": 2338707, "tid": 2338707, "ts": 6267453699326.945, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453699366.153, "dur": 192.488, + "args": { + "External id": 465623,"Sequence number": 5036699, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18715 + } + }, + { + "ph": "s", "id": 344, "pid": 2338707, "tid": 2338707, "ts": 6267453699366.153, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453699391.090, "dur": 15.685, + "args": { + "External id": 465624,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453699399.797, "dur": 6.502, + "args": { + "External id": 465625,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453699573.805, "dur": 155.562, + "args": { + "External id": 465626,"Sequence number": 5036700, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18718 + } + }, + { + "ph": "s", "id": 343, "pid": 2338707, "tid": 2338707, "ts": 6267453699573.805, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453699590.850, "dur": 27.509, + "args": { + "External id": 465627,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453699610.871, "dur": 6.691, + "args": { + "External id": 465628,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338707, "tid": 2338707, + "ts": 6267453699763.156, "dur": 206.725, + "args": { + "External id": 465629,"Sequence number": 5036701, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18721 + } + }, + { + "ph": "s", "id": 342, "pid": 2338707, "tid": 2338707, "ts": 6267453699763.156, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453699801.023, "dur": 141.079, + "args": { + "External id": 465630,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453699855.929, "dur": 10.174, + "args": { + "External id": 465631,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453699861.639, "dur": 3.868, + "args": { + "External id": 465632,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453699869.132, "dur": 4.047, + "args": { + "External id": 465633,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453699874.524, "dur": 1.445, + "args": { + "External id": 465634,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453699878.995, "dur": 3.960, + "args": { + "External id": 465635,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2338707, + "ts": 6267453699954.709, "dur": 5.892, + "args": { + "External id": 465636,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453699975.971, "dur": 6.302, + "args": { + "External id": 465637,"Sequence number": 5036702, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453699977.830, "dur": 4.304, + "args": { + "External id": 465638,"Sequence number": 5036702, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18730 + } + }, + { + "ph": "s", "id": 341, "pid": 2338707, "tid": 2338707, "ts": 6267453699977.830, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453699996.069, "dur": 177.145, + "args": { + "External id": 465639,"Sequence number": 5036703, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453699999.326, "dur": 9.752, + "args": { + "External id": 465640,"Sequence number": 5036703, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18732 + } + }, + { + "ph": "s", "id": 340, "pid": 2338707, "tid": 2338707, "ts": 6267453699999.326, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453700002.505, "dur": 5.173, + "args": { + "External id": 465641,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453700005.531, "dur": 1.872, + "args": { + "External id": 465642,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453700010.629, "dur": 162.143, + "args": { + "External id": 465643,"Sequence number": 5036704, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453700012.625, "dur": 6.168, + "args": { + "External id": 465644,"Sequence number": 5036704, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453700015.706, "dur": 2.948, + "args": { + "External id": 465645,"Sequence number": 5036704, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18737 + } + }, + { + "ph": "s", "id": 339, "pid": 2338707, "tid": 2338707, "ts": 6267453700015.706, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453700020.016, "dur": 139.914, + "args": { + "External id": 465646,"Sequence number": 5036705, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18738 + } + }, + { + "ph": "s", "id": 338, "pid": 2338707, "tid": 2338707, "ts": 6267453700020.016, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453700165.088, "dur": 6.710, + "args": { + "External id": 465647,"Sequence number": 5036706, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18739 + } + }, + { + "ph": "s", "id": 337, "pid": 2338707, "tid": 2338707, "ts": 6267453700165.088, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453700217.734, "dur": 272.128, + "args": { + "External id": 465648,"Sequence number": 5036707, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18740 + } + }, + { + "ph": "s", "id": 336, "pid": 2338707, "tid": 2338707, "ts": 6267453700217.734, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453700245.935, "dur": 5.853, + "args": { + "External id": 465649,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453700249.720, "dur": 1.909, + "args": { + "External id": 465650,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453700256.625, "dur": 3.854, + "args": { + "External id": 465651,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453700257.772, "dur": 2.565, + "args": { + "External id": 465652,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453700258.980, "dur": 1.233, + "args": { + "External id": 465653,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453700269.977, "dur": 11.892, + "args": { + "External id": 465654,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453700274.799, "dur": 6.726, + "args": { + "External id": 465655,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453700289.122, "dur": 3.407, + "args": { + "External id": 465656,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453700296.864, "dur": 4.736, + "args": { + "External id": 465657,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453700461.960, "dur": 4.384, + "args": { + "External id": 465658,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453700463.393, "dur": 2.637, + "args": { + "External id": 465659,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453700469.359, "dur": 5.171, + "args": { + "External id": 465660,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453700473.189, "dur": 1.239, + "args": { + "External id": 465661,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453700514.552, "dur": 147.875, + "args": { + "External id": 465662,"Sequence number": 5036708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453700515.944, "dur": 9.342, + "args": { + "External id": 465663,"Sequence number": 5036708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18755 + } + }, + { + "ph": "s", "id": 335, "pid": 2338707, "tid": 2338707, "ts": 6267453700515.944, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453700519.171, "dur": 4.860, + "args": { + "External id": 465664,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453700521.931, "dur": 1.788, + "args": { + "External id": 465665,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453700526.652, "dur": 135.427, + "args": { + "External id": 465666,"Sequence number": 5036709, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453700531.210, "dur": 6.552, + "args": { + "External id": 465667,"Sequence number": 5036709, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453700532.052, "dur": 5.561, + "args": { + "External id": 465668,"Sequence number": 5036709, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18760 + } + }, + { + "ph": "s", "id": 334, "pid": 2338707, "tid": 2338707, "ts": 6267453700532.052, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453700538.805, "dur": 111.184, + "args": { + "External id": 465669,"Sequence number": 5036710, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18761 + } + }, + { + "ph": "s", "id": 333, "pid": 2338707, "tid": 2338707, "ts": 6267453700538.805, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453700654.602, "dur": 6.569, + "args": { + "External id": 465670,"Sequence number": 5036711, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18762 + } + }, + { + "ph": "s", "id": 332, "pid": 2338707, "tid": 2338707, "ts": 6267453700654.602, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453700677.673, "dur": 87.948, + "args": { + "External id": 465671,"Sequence number": 5036712, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453700678.492, "dur": 7.362, + "args": { + "External id": 465672,"Sequence number": 5036712, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18764 + } + }, + { + "ph": "s", "id": 331, "pid": 2338707, "tid": 2338707, "ts": 6267453700678.492, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453700680.747, "dur": 3.582, + "args": { + "External id": 465673,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453700683.227, "dur": 0.920, + "args": { + "External id": 465674,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453700686.798, "dur": 78.545, + "args": { + "External id": 465675,"Sequence number": 5036713, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453700690.366, "dur": 8.103, + "args": { + "External id": 465676,"Sequence number": 5036713, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453700691.386, "dur": 6.898, + "args": { + "External id": 465677,"Sequence number": 5036713, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18769 + } + }, + { + "ph": "s", "id": 330, "pid": 2338707, "tid": 2338707, "ts": 6267453700691.386, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453700699.459, "dur": 60.832, + "args": { + "External id": 465678,"Sequence number": 5036714, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18770 + } + }, + { + "ph": "s", "id": 329, "pid": 2338707, "tid": 2338707, "ts": 6267453700699.459, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453700762.897, "dur": 2.042, + "args": { + "External id": 465679,"Sequence number": 5036715, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18771 + } + }, + { + "ph": "s", "id": 328, "pid": 2338707, "tid": 2338707, "ts": 6267453700762.897, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453700794.883, "dur": 178.594, + "args": { + "External id": 465680,"Sequence number": 5036716, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18772 + } + }, + { + "ph": "s", "id": 327, "pid": 2338707, "tid": 2338707, "ts": 6267453700794.883, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453700838.760, "dur": 4.860, + "args": { + "External id": 465681,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453700886.384, "dur": 71.030, + "args": { + "External id": 465682,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453700887.384, "dur": 6.033, + "args": { + "External id": 465683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453700888.776, "dur": 3.732, + "args": { + "External id": 465684,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453700891.273, "dur": 0.921, + "args": { + "External id": 465685,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453700894.565, "dur": 62.469, + "args": { + "External id": 465686,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453700898.420, "dur": 2.733, + "args": { + "External id": 465687,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453700899.536, "dur": 1.498, + "args": { + "External id": 465688,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453700901.894, "dur": 51.073, + "args": { + "External id": 465689,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453700955.227, "dur": 1.171, + "args": { + "External id": 465690,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2338707, + "ts": 6267453700984.110, "dur": 28.891, + "args": { + "External id": 465691,"Sequence number": 5036717, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18783 + } + }, + { + "ph": "s", "id": 326, "pid": 2338707, "tid": 2338707, "ts": 6267453700984.110, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453701055.188, "dur": 277.596, + "args": { + "External id": 465692,"Sequence number": 5036718, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18784 + } + }, + { + "ph": "s", "id": 325, "pid": 2338707, "tid": 2338707, "ts": 6267453701055.188, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453701124.313, "dur": 6.087, + "args": { + "External id": 465693,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453701126.193, "dur": 3.845, + "args": { + "External id": 465694,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453701140.611, "dur": 9.331, + "args": { + "External id": 465695,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453701143.875, "dur": 5.597, + "args": { + "External id": 465696,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453701156.884, "dur": 6.339, + "args": { + "External id": 465697,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453701314.517, "dur": 3.730, + "args": { + "External id": 465698,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453701315.982, "dur": 2.072, + "args": { + "External id": 465699,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453701356.054, "dur": 113.011, + "args": { + "External id": 465700,"Sequence number": 5036719, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453701360.228, "dur": 8.482, + "args": { + "External id": 465701,"Sequence number": 5036719, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18793 + } + }, + { + "ph": "s", "id": 324, "pid": 2338707, "tid": 2338707, "ts": 6267453701360.228, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453701363.432, "dur": 3.762, + "args": { + "External id": 465702,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453701365.552, "dur": 1.428, + "args": { + "External id": 465703,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453701370.072, "dur": 98.687, + "args": { + "External id": 465704,"Sequence number": 5036720, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453701371.968, "dur": 7.320, + "args": { + "External id": 465705,"Sequence number": 5036720, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453701375.669, "dur": 3.385, + "args": { + "External id": 465706,"Sequence number": 5036720, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18798 + } + }, + { + "ph": "s", "id": 323, "pid": 2338707, "tid": 2338707, "ts": 6267453701375.669, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453701380.274, "dur": 79.829, + "args": { + "External id": 465707,"Sequence number": 5036721, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18799 + } + }, + { + "ph": "s", "id": 322, "pid": 2338707, "tid": 2338707, "ts": 6267453701380.274, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453701462.868, "dur": 4.969, + "args": { + "External id": 465708,"Sequence number": 5036722, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18800 + } + }, + { + "ph": "s", "id": 321, "pid": 2338707, "tid": 2338707, "ts": 6267453701462.868, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453701484.087, "dur": 77.732, + "args": { + "External id": 465709,"Sequence number": 5036723, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453701484.721, "dur": 7.350, + "args": { + "External id": 465710,"Sequence number": 5036723, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18802 + } + }, + { + "ph": "s", "id": 320, "pid": 2338707, "tid": 2338707, "ts": 6267453701484.721, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453701488.055, "dur": 2.610, + "args": { + "External id": 465711,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453701489.569, "dur": 0.940, + "args": { + "External id": 465712,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453701492.779, "dur": 68.640, + "args": { + "External id": 465713,"Sequence number": 5036724, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453701493.897, "dur": 8.277, + "args": { + "External id": 465714,"Sequence number": 5036724, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453701496.722, "dur": 5.277, + "args": { + "External id": 465715,"Sequence number": 5036724, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18807 + } + }, + { + "ph": "s", "id": 319, "pid": 2338707, "tid": 2338707, "ts": 6267453701496.722, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453701503.059, "dur": 50.760, + "args": { + "External id": 465716,"Sequence number": 5036725, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18808 + } + }, + { + "ph": "s", "id": 318, "pid": 2338707, "tid": 2338707, "ts": 6267453701503.059, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453701556.159, "dur": 4.763, + "args": { + "External id": 465717,"Sequence number": 5036726, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18809 + } + }, + { + "ph": "s", "id": 317, "pid": 2338707, "tid": 2338707, "ts": 6267453701556.159, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453701571.116, "dur": 99.864, + "args": { + "External id": 465718,"Sequence number": 5036727, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453701571.844, "dur": 10.036, + "args": { + "External id": 465719,"Sequence number": 5036727, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18811 + } + }, + { + "ph": "s", "id": 316, "pid": 2338707, "tid": 2338707, "ts": 6267453701571.844, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453701573.460, "dur": 7.315, + "args": { + "External id": 465720,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453701579.784, "dur": 0.806, + "args": { + "External id": 465721,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453701583.002, "dur": 87.621, + "args": { + "External id": 465722,"Sequence number": 5036728, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453701584.103, "dur": 4.810, + "args": { + "External id": 465723,"Sequence number": 5036728, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453701585.039, "dur": 3.731, + "args": { + "External id": 465724,"Sequence number": 5036728, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18816 + } + }, + { + "ph": "s", "id": 315, "pid": 2338707, "tid": 2338707, "ts": 6267453701585.039, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453701609.842, "dur": 52.703, + "args": { + "External id": 465725,"Sequence number": 5036729, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18817 + } + }, + { + "ph": "s", "id": 314, "pid": 2338707, "tid": 2338707, "ts": 6267453701609.842, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453701665.585, "dur": 4.555, + "args": { + "External id": 465726,"Sequence number": 5036730, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18818 + } + }, + { + "ph": "s", "id": 313, "pid": 2338707, "tid": 2338707, "ts": 6267453701665.585, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453701692.469, "dur": 5.307, + "args": { + "External id": 465727,"Sequence number": 5036731, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453701694.067, "dur": 3.580, + "args": { + "External id": 465728,"Sequence number": 5036731, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18820 + } + }, + { + "ph": "s", "id": 312, "pid": 2338707, "tid": 2338707, "ts": 6267453701694.067, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453701705.862, "dur": 5.617, + "args": { + "External id": 465729,"Sequence number": 5036732, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453701709.346, "dur": 2.011, + "args": { + "External id": 465730,"Sequence number": 5036732, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18822 + } + }, + { + "ph": "s", "id": 311, "pid": 2338707, "tid": 2338707, "ts": 6267453701709.346, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453701716.310, "dur": 3.396, + "args": { + "External id": 465731,"Sequence number": 5036733, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453701717.443, "dur": 2.137, + "args": { + "External id": 465732,"Sequence number": 5036733, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18824 + } + }, + { + "ph": "s", "id": 310, "pid": 2338707, "tid": 2338707, "ts": 6267453701717.443, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453701756.636, "dur": 186.798, + "args": { + "External id": 465733,"Sequence number": 5036734, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18825 + } + }, + { + "ph": "s", "id": 309, "pid": 2338707, "tid": 2338707, "ts": 6267453701756.636, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453701779.388, "dur": 14.064, + "args": { + "External id": 465734,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453701782.764, "dur": 9.960, + "args": { + "External id": 465735,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453701960.379, "dur": 175.985, + "args": { + "External id": 465736,"Sequence number": 5036735, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18828 + } + }, + { + "ph": "s", "id": 308, "pid": 2338707, "tid": 2338707, "ts": 6267453701960.379, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453701977.072, "dur": 7.894, + "args": { + "External id": 465737,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453701979.677, "dur": 4.861, + "args": { + "External id": 465738,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338707, "tid": 2338707, + "ts": 6267453702174.380, "dur": 225.238, + "args": { + "External id": 465739,"Sequence number": 5036736, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18831 + } + }, + { + "ph": "s", "id": 307, "pid": 2338707, "tid": 2338707, "ts": 6267453702174.380, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453702217.314, "dur": 152.551, + "args": { + "External id": 465740,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453702274.759, "dur": 9.925, + "args": { + "External id": 465741,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453702278.249, "dur": 5.856, + "args": { + "External id": 465742,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453702287.500, "dur": 4.448, + "args": { + "External id": 465743,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453702293.694, "dur": 4.104, + "args": { + "External id": 465744,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453702301.417, "dur": 6.357, + "args": { + "External id": 465745,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2338707, + "ts": 6267453702383.264, "dur": 6.067, + "args": { + "External id": 465746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453702405.714, "dur": 9.087, + "args": { + "External id": 465747,"Sequence number": 5036737, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453702407.517, "dur": 7.137, + "args": { + "External id": 465748,"Sequence number": 5036737, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18840 + } + }, + { + "ph": "s", "id": 306, "pid": 2338707, "tid": 2338707, "ts": 6267453702407.517, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453702428.772, "dur": 130.660, + "args": { + "External id": 465749,"Sequence number": 5036738, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453702430.540, "dur": 13.269, + "args": { + "External id": 465750,"Sequence number": 5036738, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18842 + } + }, + { + "ph": "s", "id": 305, "pid": 2338707, "tid": 2338707, "ts": 6267453702430.540, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453702437.338, "dur": 5.221, + "args": { + "External id": 465751,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453702440.379, "dur": 1.859, + "args": { + "External id": 465752,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453702445.370, "dur": 113.585, + "args": { + "External id": 465753,"Sequence number": 5036739, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453702450.204, "dur": 3.508, + "args": { + "External id": 465754,"Sequence number": 5036739, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453702451.260, "dur": 2.293, + "args": { + "External id": 465755,"Sequence number": 5036739, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18847 + } + }, + { + "ph": "s", "id": 304, "pid": 2338707, "tid": 2338707, "ts": 6267453702451.260, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453702455.196, "dur": 95.903, + "args": { + "External id": 465756,"Sequence number": 5036740, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18848 + } + }, + { + "ph": "s", "id": 303, "pid": 2338707, "tid": 2338707, "ts": 6267453702455.196, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453702554.059, "dur": 4.128, + "args": { + "External id": 465757,"Sequence number": 5036741, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18849 + } + }, + { + "ph": "s", "id": 302, "pid": 2338707, "tid": 2338707, "ts": 6267453702554.059, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453702618.619, "dur": 260.780, + "args": { + "External id": 465758,"Sequence number": 5036742, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18850 + } + }, + { + "ph": "s", "id": 301, "pid": 2338707, "tid": 2338707, "ts": 6267453702618.619, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453702644.005, "dur": 4.470, + "args": { + "External id": 465759,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453702645.749, "dur": 2.430, + "args": { + "External id": 465760,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453702653.408, "dur": 9.057, + "args": { + "External id": 465761,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453702657.820, "dur": 4.508, + "args": { + "External id": 465762,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453702661.336, "dur": 0.874, + "args": { + "External id": 465763,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453702673.420, "dur": 9.423, + "args": { + "External id": 465764,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453702676.012, "dur": 6.514, + "args": { + "External id": 465765,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453702689.803, "dur": 5.902, + "args": { + "External id": 465766,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453702699.899, "dur": 6.399, + "args": { + "External id": 465767,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453702855.748, "dur": 4.118, + "args": { + "External id": 465768,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453702857.287, "dur": 2.319, + "args": { + "External id": 465769,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453702863.222, "dur": 2.014, + "args": { + "External id": 465770,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453702864.410, "dur": 0.700, + "args": { + "External id": 465771,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453702901.947, "dur": 117.745, + "args": { + "External id": 465772,"Sequence number": 5036743, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453702903.370, "dur": 13.114, + "args": { + "External id": 465773,"Sequence number": 5036743, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18865 + } + }, + { + "ph": "s", "id": 300, "pid": 2338707, "tid": 2338707, "ts": 6267453702903.370, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453702906.173, "dur": 8.733, + "args": { + "External id": 465774,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453702910.786, "dur": 3.771, + "args": { + "External id": 465775,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453702917.870, "dur": 101.332, + "args": { + "External id": 465776,"Sequence number": 5036744, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453702919.793, "dur": 3.827, + "args": { + "External id": 465777,"Sequence number": 5036744, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453702920.956, "dur": 2.449, + "args": { + "External id": 465778,"Sequence number": 5036744, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18870 + } + }, + { + "ph": "s", "id": 299, "pid": 2338707, "tid": 2338707, "ts": 6267453702920.956, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453702924.503, "dur": 84.947, + "args": { + "External id": 465779,"Sequence number": 5036745, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18871 + } + }, + { + "ph": "s", "id": 298, "pid": 2338707, "tid": 2338707, "ts": 6267453702924.503, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453703012.680, "dur": 5.732, + "args": { + "External id": 465780,"Sequence number": 5036746, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18872 + } + }, + { + "ph": "s", "id": 297, "pid": 2338707, "tid": 2338707, "ts": 6267453703012.680, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453703029.253, "dur": 141.871, + "args": { + "External id": 465781,"Sequence number": 5036747, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453703030.287, "dur": 6.134, + "args": { + "External id": 465782,"Sequence number": 5036747, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18874 + } + }, + { + "ph": "s", "id": 296, "pid": 2338707, "tid": 2338707, "ts": 6267453703030.287, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453703032.145, "dur": 3.066, + "args": { + "External id": 465783,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453703034.230, "dur": 0.800, + "args": { + "External id": 465784,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453703039.921, "dur": 130.933, + "args": { + "External id": 465785,"Sequence number": 5036748, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453703041.146, "dur": 5.761, + "args": { + "External id": 465786,"Sequence number": 5036748, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453703042.353, "dur": 4.379, + "args": { + "External id": 465787,"Sequence number": 5036748, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18879 + } + }, + { + "ph": "s", "id": 295, "pid": 2338707, "tid": 2338707, "ts": 6267453703042.353, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453703047.842, "dur": 107.148, + "args": { + "External id": 465788,"Sequence number": 5036749, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18880 + } + }, + { + "ph": "s", "id": 294, "pid": 2338707, "tid": 2338707, "ts": 6267453703047.842, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453703159.512, "dur": 10.402, + "args": { + "External id": 465789,"Sequence number": 5036750, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18881 + } + }, + { + "ph": "s", "id": 293, "pid": 2338707, "tid": 2338707, "ts": 6267453703159.512, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453703199.742, "dur": 175.838, + "args": { + "External id": 465790,"Sequence number": 5036751, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18882 + } + }, + { + "ph": "s", "id": 292, "pid": 2338707, "tid": 2338707, "ts": 6267453703199.742, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453703244.494, "dur": 6.575, + "args": { + "External id": 465791,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453703286.248, "dur": 74.667, + "args": { + "External id": 465792,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453703287.420, "dur": 11.115, + "args": { + "External id": 465793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453703289.168, "dur": 8.050, + "args": { + "External id": 465794,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453703293.967, "dur": 3.019, + "args": { + "External id": 465795,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453703300.158, "dur": 60.369, + "args": { + "External id": 465796,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453703301.708, "dur": 3.268, + "args": { + "External id": 465797,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453703302.953, "dur": 1.894, + "args": { + "External id": 465798,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453703305.900, "dur": 50.254, + "args": { + "External id": 465799,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453703358.614, "dur": 1.354, + "args": { + "External id": 465800,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2338707, + "ts": 6267453703386.353, "dur": 27.095, + "args": { + "External id": 465801,"Sequence number": 5036752, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18893 + } + }, + { + "ph": "s", "id": 291, "pid": 2338707, "tid": 2338707, "ts": 6267453703386.353, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453703456.292, "dur": 240.118, + "args": { + "External id": 465802,"Sequence number": 5036753, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18894 + } + }, + { + "ph": "s", "id": 290, "pid": 2338707, "tid": 2338707, "ts": 6267453703456.292, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453703477.257, "dur": 6.066, + "args": { + "External id": 465803,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453703478.662, "dur": 4.456, + "args": { + "External id": 465804,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453703492.873, "dur": 8.510, + "args": { + "External id": 465805,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453703496.360, "dur": 4.595, + "args": { + "External id": 465806,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453703508.471, "dur": 7.244, + "args": { + "External id": 465807,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453703677.696, "dur": 5.198, + "args": { + "External id": 465808,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453703679.082, "dur": 3.300, + "args": { + "External id": 465809,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453703719.052, "dur": 109.520, + "args": { + "External id": 465810,"Sequence number": 5036754, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453703720.325, "dur": 8.536, + "args": { + "External id": 465811,"Sequence number": 5036754, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18903 + } + }, + { + "ph": "s", "id": 289, "pid": 2338707, "tid": 2338707, "ts": 6267453703720.325, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453703722.860, "dur": 4.435, + "args": { + "External id": 465812,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453703725.369, "dur": 1.724, + "args": { + "External id": 465813,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453703730.274, "dur": 98.028, + "args": { + "External id": 465814,"Sequence number": 5036755, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453703735.191, "dur": 6.650, + "args": { + "External id": 465815,"Sequence number": 5036755, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453703736.204, "dur": 5.465, + "args": { + "External id": 465816,"Sequence number": 5036755, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18908 + } + }, + { + "ph": "s", "id": 288, "pid": 2338707, "tid": 2338707, "ts": 6267453703736.204, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453703742.946, "dur": 77.654, + "args": { + "External id": 465817,"Sequence number": 5036756, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18909 + } + }, + { + "ph": "s", "id": 287, "pid": 2338707, "tid": 2338707, "ts": 6267453703742.946, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453703823.621, "dur": 3.942, + "args": { + "External id": 465818,"Sequence number": 5036757, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18910 + } + }, + { + "ph": "s", "id": 286, "pid": 2338707, "tid": 2338707, "ts": 6267453703823.621, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453703841.396, "dur": 75.240, + "args": { + "External id": 465819,"Sequence number": 5036758, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453703842.173, "dur": 9.147, + "args": { + "External id": 465820,"Sequence number": 5036758, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18912 + } + }, + { + "ph": "s", "id": 285, "pid": 2338707, "tid": 2338707, "ts": 6267453703842.173, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453703844.609, "dur": 5.396, + "args": { + "External id": 465821,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453703849.095, "dur": 0.700, + "args": { + "External id": 465822,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453703852.139, "dur": 64.121, + "args": { + "External id": 465823,"Sequence number": 5036759, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453703855.636, "dur": 4.181, + "args": { + "External id": 465824,"Sequence number": 5036759, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453703857.045, "dur": 2.619, + "args": { + "External id": 465825,"Sequence number": 5036759, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18917 + } + }, + { + "ph": "s", "id": 284, "pid": 2338707, "tid": 2338707, "ts": 6267453703857.045, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453703860.849, "dur": 44.315, + "args": { + "External id": 465826,"Sequence number": 5036760, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18918 + } + }, + { + "ph": "s", "id": 283, "pid": 2338707, "tid": 2338707, "ts": 6267453703860.849, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453703907.352, "dur": 8.077, + "args": { + "External id": 465827,"Sequence number": 5036761, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18919 + } + }, + { + "ph": "s", "id": 282, "pid": 2338707, "tid": 2338707, "ts": 6267453703907.352, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453703924.732, "dur": 69.093, + "args": { + "External id": 465828,"Sequence number": 5036762, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453703925.450, "dur": 8.090, + "args": { + "External id": 465829,"Sequence number": 5036762, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18921 + } + }, + { + "ph": "s", "id": 281, "pid": 2338707, "tid": 2338707, "ts": 6267453703925.450, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453703929.675, "dur": 2.603, + "args": { + "External id": 465830,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453703931.306, "dur": 0.831, + "args": { + "External id": 465831,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453703934.575, "dur": 58.889, + "args": { + "External id": 465832,"Sequence number": 5036763, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453703935.502, "dur": 8.554, + "args": { + "External id": 465833,"Sequence number": 5036763, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453703939.730, "dur": 4.163, + "args": { + "External id": 465834,"Sequence number": 5036763, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18926 + } + }, + { + "ph": "s", "id": 280, "pid": 2338707, "tid": 2338707, "ts": 6267453703939.730, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453703944.863, "dur": 43.639, + "args": { + "External id": 465835,"Sequence number": 5036764, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18927 + } + }, + { + "ph": "s", "id": 279, "pid": 2338707, "tid": 2338707, "ts": 6267453703944.863, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453703990.466, "dur": 2.674, + "args": { + "External id": 465836,"Sequence number": 5036765, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18928 + } + }, + { + "ph": "s", "id": 278, "pid": 2338707, "tid": 2338707, "ts": 6267453703990.466, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453704011.950, "dur": 6.709, + "args": { + "External id": 465837,"Sequence number": 5036766, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453704013.628, "dur": 4.881, + "args": { + "External id": 465838,"Sequence number": 5036766, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18930 + } + }, + { + "ph": "s", "id": 277, "pid": 2338707, "tid": 2338707, "ts": 6267453704013.628, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453704026.859, "dur": 6.450, + "args": { + "External id": 465839,"Sequence number": 5036767, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453704028.490, "dur": 4.627, + "args": { + "External id": 465840,"Sequence number": 5036767, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18932 + } + }, + { + "ph": "s", "id": 276, "pid": 2338707, "tid": 2338707, "ts": 6267453704028.490, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453704037.887, "dur": 3.910, + "args": { + "External id": 465841,"Sequence number": 5036768, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453704039.551, "dur": 2.112, + "args": { + "External id": 465842,"Sequence number": 5036768, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18934 + } + }, + { + "ph": "s", "id": 275, "pid": 2338707, "tid": 2338707, "ts": 6267453704039.551, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453704121.294, "dur": 199.723, + "args": { + "External id": 465843,"Sequence number": 5036769, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18935 + } + }, + { + "ph": "s", "id": 274, "pid": 2338707, "tid": 2338707, "ts": 6267453704121.294, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453704145.757, "dur": 12.343, + "args": { + "External id": 465844,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453704149.294, "dur": 7.998, + "args": { + "External id": 465845,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453704335.814, "dur": 124.292, + "args": { + "External id": 465846,"Sequence number": 5036770, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18938 + } + }, + { + "ph": "s", "id": 273, "pid": 2338707, "tid": 2338707, "ts": 6267453704335.814, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453704351.909, "dur": 10.048, + "args": { + "External id": 465847,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453704354.628, "dur": 6.955, + "args": { + "External id": 465848,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338707, "tid": 2338707, + "ts": 6267453704494.891, "dur": 244.400, + "args": { + "External id": 465849,"Sequence number": 5036771, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18941 + } + }, + { + "ph": "s", "id": 272, "pid": 2338707, "tid": 2338707, "ts": 6267453704494.891, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453704526.231, "dur": 169.023, + "args": { + "External id": 465850,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453704580.573, "dur": 7.757, + "args": { + "External id": 465851,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453704583.291, "dur": 4.351, + "args": { + "External id": 465852,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453704608.857, "dur": 6.848, + "args": { + "External id": 465853,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453704620.959, "dur": 1.222, + "args": { + "External id": 465854,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453704625.478, "dur": 5.713, + "args": { + "External id": 465855,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338707, "tid": 2338707, + "ts": 6267453704709.195, "dur": 5.945, + "args": { + "External id": 465856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453704745.527, "dur": 7.445, + "args": { + "External id": 465857,"Sequence number": 5036772, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453704747.429, "dur": 5.396, + "args": { + "External id": 465858,"Sequence number": 5036772, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18950 + } + }, + { + "ph": "s", "id": 271, "pid": 2338707, "tid": 2338707, "ts": 6267453704747.429, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453704767.895, "dur": 132.173, + "args": { + "External id": 465859,"Sequence number": 5036773, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453704769.242, "dur": 10.600, + "args": { + "External id": 465860,"Sequence number": 5036773, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18952 + } + }, + { + "ph": "s", "id": 270, "pid": 2338707, "tid": 2338707, "ts": 6267453704769.242, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453704772.434, "dur": 5.836, + "args": { + "External id": 465861,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453704775.843, "dur": 2.143, + "args": { + "External id": 465862,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453704784.077, "dur": 115.593, + "args": { + "External id": 465863,"Sequence number": 5036774, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453704786.725, "dur": 4.860, + "args": { + "External id": 465864,"Sequence number": 5036774, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453704787.666, "dur": 3.706, + "args": { + "External id": 465865,"Sequence number": 5036774, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18957 + } + }, + { + "ph": "s", "id": 269, "pid": 2338707, "tid": 2338707, "ts": 6267453704787.666, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453704792.792, "dur": 97.116, + "args": { + "External id": 465866,"Sequence number": 5036775, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18958 + } + }, + { + "ph": "s", "id": 268, "pid": 2338707, "tid": 2338707, "ts": 6267453704792.792, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453704893.115, "dur": 5.910, + "args": { + "External id": 465867,"Sequence number": 5036776, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18959 + } + }, + { + "ph": "s", "id": 267, "pid": 2338707, "tid": 2338707, "ts": 6267453704893.115, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453704946.245, "dur": 308.128, + "args": { + "External id": 465868,"Sequence number": 5036777, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18960 + } + }, + { + "ph": "s", "id": 266, "pid": 2338707, "tid": 2338707, "ts": 6267453704946.245, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453704967.482, "dur": 2.869, + "args": { + "External id": 465869,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453704968.509, "dur": 1.722, + "args": { + "External id": 465870,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338707, "tid": 2338707, + "ts": 6267453704974.709, "dur": 5.986, + "args": { + "External id": 465871,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453704978.368, "dur": 2.195, + "args": { + "External id": 465872,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453704979.242, "dur": 1.213, + "args": { + "External id": 465873,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453704989.451, "dur": 7.817, + "args": { + "External id": 465874,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453704991.549, "dur": 5.314, + "args": { + "External id": 465875,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453705004.332, "dur": 5.458, + "args": { + "External id": 465876,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453705016.431, "dur": 3.265, + "args": { + "External id": 465877,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453705224.227, "dur": 5.813, + "args": { + "External id": 465878,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453705225.731, "dur": 3.855, + "args": { + "External id": 465879,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453705233.791, "dur": 3.070, + "args": { + "External id": 465880,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453705235.306, "dur": 1.437, + "args": { + "External id": 465881,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453705279.991, "dur": 132.562, + "args": { + "External id": 465882,"Sequence number": 5036778, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453705281.069, "dur": 11.605, + "args": { + "External id": 465883,"Sequence number": 5036778, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18975 + } + }, + { + "ph": "s", "id": 265, "pid": 2338707, "tid": 2338707, "ts": 6267453705281.069, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453705284.251, "dur": 7.020, + "args": { + "External id": 465884,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453705288.960, "dur": 1.987, + "args": { + "External id": 465885,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453705294.380, "dur": 117.828, + "args": { + "External id": 465886,"Sequence number": 5036779, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453705296.364, "dur": 5.008, + "args": { + "External id": 465887,"Sequence number": 5036779, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453705297.515, "dur": 3.715, + "args": { + "External id": 465888,"Sequence number": 5036779, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18980 + } + }, + { + "ph": "s", "id": 264, "pid": 2338707, "tid": 2338707, "ts": 6267453705297.515, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453705304.799, "dur": 96.918, + "args": { + "External id": 465889,"Sequence number": 5036780, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18981 + } + }, + { + "ph": "s", "id": 263, "pid": 2338707, "tid": 2338707, "ts": 6267453705304.799, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453705405.020, "dur": 6.360, + "args": { + "External id": 465890,"Sequence number": 5036781, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18982 + } + }, + { + "ph": "s", "id": 262, "pid": 2338707, "tid": 2338707, "ts": 6267453705405.020, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453705424.024, "dur": 103.142, + "args": { + "External id": 465891,"Sequence number": 5036782, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453705424.952, "dur": 12.155, + "args": { + "External id": 465892,"Sequence number": 5036782, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18984 + } + }, + { + "ph": "s", "id": 261, "pid": 2338707, "tid": 2338707, "ts": 6267453705424.952, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453705430.563, "dur": 4.988, + "args": { + "External id": 465893,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453705434.548, "dur": 0.825, + "args": { + "External id": 465894,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453705437.819, "dur": 89.024, + "args": { + "External id": 465895,"Sequence number": 5036783, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453705439.180, "dur": 5.808, + "args": { + "External id": 465896,"Sequence number": 5036783, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453705440.370, "dur": 4.421, + "args": { + "External id": 465897,"Sequence number": 5036783, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18989 + } + }, + { + "ph": "s", "id": 260, "pid": 2338707, "tid": 2338707, "ts": 6267453705440.370, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453705445.882, "dur": 73.051, + "args": { + "External id": 465898,"Sequence number": 5036784, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18990 + } + }, + { + "ph": "s", "id": 259, "pid": 2338707, "tid": 2338707, "ts": 6267453705445.882, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453705521.100, "dur": 4.837, + "args": { + "External id": 465899,"Sequence number": 5036785, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18991 + } + }, + { + "ph": "s", "id": 258, "pid": 2338707, "tid": 2338707, "ts": 6267453705521.100, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453705552.540, "dur": 221.990, + "args": { + "External id": 465900,"Sequence number": 5036786, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18992 + } + }, + { + "ph": "s", "id": 257, "pid": 2338707, "tid": 2338707, "ts": 6267453705552.540, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453705619.497, "dur": 8.590, + "args": { + "External id": 465901,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453705674.091, "dur": 82.664, + "args": { + "External id": 465902,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453705675.266, "dur": 9.079, + "args": { + "External id": 465903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453705677.354, "dur": 5.662, + "args": { + "External id": 465904,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453705681.951, "dur": 0.881, + "args": { + "External id": 465905,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453705685.467, "dur": 70.795, + "args": { + "External id": 465906,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338707, "tid": 2338707, + "ts": 6267453705687.537, "dur": 2.855, + "args": { + "External id": 465907,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453705688.587, "dur": 1.457, + "args": { + "External id": 465908,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 19000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453705691.086, "dur": 60.392, + "args": { + "External id": 465909,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 19001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338707, "tid": 2338707, + "ts": 6267453705753.976, "dur": 1.523, + "args": { + "External id": 465910,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2338707, + "ts": 6267453705785.991, "dur": 30.730, + "args": { + "External id": 465911,"Sequence number": 5036787, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 19003 + } + }, + { + "ph": "s", "id": 256, "pid": 2338707, "tid": 2338707, "ts": 6267453705785.991, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338707, "tid": 2338707, + "ts": 6267453705843.705, "dur": 51.168, + "args": { + "External id": 465912,"Sequence number": 5036788, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "-2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 19004 + } + }, + { + "ph": "s", "id": 255, "pid": 2338707, "tid": 2338707, "ts": 6267453705843.705, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2338707, "tid": 2338707, + "ts": 6267453705853.728, "dur": 35.225, + "args": { + "External id": 465913,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 19005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453705890.854, "dur": 1.699, + "args": { + "External id": 465914,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 16384], []], "Ev Idx": 19006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338707, "tid": 2338707, + "ts": 6267453705938.121, "dur": 52.487, + "args": { + "External id": 465915,"Record function id": 0, "Ev Idx": 19007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 2338707, "tid": 2338707, + "ts": 6267453705993.489, "dur": 290.526, + "args": { + "External id": 465916,"Record function id": 0, "Ev Idx": 19008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453706037.898, "dur": 235.191, + "args": { + "External id": 465917,"Sequence number": 5036789, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [67108864, 16384, 4096, 1]], "Input Dims": [[4096], [8, 4096, 4, 4096]], "Ev Idx": 19009 + } + }, + { + "ph": "s", "id": 254, "pid": 2338707, "tid": 2338707, "ts": 6267453706037.898, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338707, "tid": 2338707, + "ts": 6267453706174.698, "dur": 51.094, + "args": { + "External id": 465918,"kernel_hash": "cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/du/cdud5dksr72gsdwqcwxmxkrfjwvdpdc72uaztxrtc3aq46xq4xfs.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [131072, 4096], [4096], [131072], [], [], [], [], [], [], [], [], []], "Ev Idx": 19010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2338707, + "ts": 6267453706394.483, "dur": 92.078, + "args": { + "External id": 465919,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453706399.208, "dur": 6.221, + "args": { + "External id": 465920,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267453706448.779, "dur": 37.346, + "args": { + "External id": 465921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 19013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453706451.570, "dur": 33.641, + "args": { + "External id": 465922,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 19014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2338707, + "ts": 6267453706492.681, "dur": 19.475, + "args": { + "External id": 465923,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453706493.586, "dur": 2.335, + "args": { + "External id": 465924,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267453706496.672, "dur": 15.111, + "args": { + "External id": 465925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 19017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453706497.152, "dur": 13.997, + "args": { + "External id": 465926,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 19018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2338707, + "ts": 6267453706515.843, "dur": 20.874, + "args": { + "External id": 465927,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453706519.315, "dur": 2.257, + "args": { + "External id": 465928,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267453706522.334, "dur": 14.093, + "args": { + "External id": 465929,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 19021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453706522.893, "dur": 13.155, + "args": { + "External id": 465930,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 19022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453706548.262, "dur": 0.839, + "args": { + "External id": 465931,"Sequence number": 5036790, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 19023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338707, "tid": 2338707, + "ts": 6267453706560.277, "dur": 11.118, + "args": { + "External id": 465932,"Sequence number": 5036790, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "5", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 19024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706566.993, "dur": 2.260, + "args": { + "External id": 465933,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 19025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453706579.245, "dur": 7.566, + "args": { + "External id": 465934,"Sequence number": 5036790, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 19026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706583.793, "dur": 0.932, + "args": { + "External id": 465935,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 19027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453706588.294, "dur": 19.362, + "args": { + "External id": 465936,"Sequence number": 5036790, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 19028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706590.537, "dur": 0.539, + "args": { + "External id": 465937,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 19029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453706610.899, "dur": 4.272, + "args": { + "External id": 465938,"Sequence number": 5036790, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 19030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706613.542, "dur": 0.799, + "args": { + "External id": 465939,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 19031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453706622.377, "dur": 6.235, + "args": { + "External id": 465940,"Sequence number": 5036790, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 19032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706624.882, "dur": 3.099, + "args": { + "External id": 465941,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 19033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453706629.747, "dur": 3.890, + "args": { + "External id": 465942,"Sequence number": 5036790, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 19034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706632.243, "dur": 0.585, + "args": { + "External id": 465943,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 19035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453706634.961, "dur": 2.941, + "args": { + "External id": 465944,"Sequence number": 5036790, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4096, 4], [], [], [], []], "Ev Idx": 19036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706636.785, "dur": 0.496, + "args": { + "External id": 465945,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 19037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453706643.315, "dur": 5.940, + "args": { + "External id": 465946,"Sequence number": 5036790, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4096, 4], [], []], "Ev Idx": 19038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706647.724, "dur": 0.523, + "args": { + "External id": 465947,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 19039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453706654.391, "dur": 3.436, + "args": { + "External id": 465948,"Sequence number": 5036790, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 19040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706656.621, "dur": 0.554, + "args": { + "External id": 465949,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2338707, + "ts": 6267453706661.110, "dur": 6.901, + "args": { + "External id": 465950,"Sequence number": 5036790, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 19042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706666.092, "dur": 0.684, + "args": { + "External id": 465951,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453706671.648, "dur": 2.623, + "args": { + "External id": 465952,"Sequence number": 5036790, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 19044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706673.281, "dur": 0.354, + "args": { + "External id": 465953,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 19045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453706676.990, "dur": 6.894, + "args": { + "External id": 465954,"Sequence number": 5036790, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19046 + } + }, + { + "ph": "s", "id": 253, "pid": 2338707, "tid": 2338707, "ts": 6267453706676.990, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706681.053, "dur": 0.761, + "args": { + "External id": 465955,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453706684.964, "dur": 9.569, + "args": { + "External id": 465956,"Sequence number": 5036791, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19048 + } + }, + { + "ph": "s", "id": 252, "pid": 2338707, "tid": 2338707, "ts": 6267453706684.964, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706690.910, "dur": 2.791, + "args": { + "External id": 465957,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2338707, + "ts": 6267453706695.805, "dur": 5.425, + "args": { + "External id": 465958,"Sequence number": 5036792, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 19050 + } + }, + { + "ph": "s", "id": 251, "pid": 2338707, "tid": 2338707, "ts": 6267453706695.805, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706699.739, "dur": 0.534, + "args": { + "External id": 465959,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453706702.550, "dur": 7.030, + "args": { + "External id": 465960,"Sequence number": 5036793, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 19052 + } + }, + { + "ph": "s", "id": 250, "pid": 2338707, "tid": 2338707, "ts": 6267453706702.550, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706708.190, "dur": 0.455, + "args": { + "External id": 465961,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 19053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267453706713.966, "dur": 51.010, + "args": { + "External id": 465962,"Sequence number": 5036794, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267453706715.605, "dur": 49.128, + "args": { + "External id": 465963,"Sequence number": 5036794, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453706718.494, "dur": 10.036, + "args": { + "External id": 465964,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 19056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453706723.083, "dur": 4.656, + "args": { + "External id": 465965,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453706730.267, "dur": 33.938, + "args": { + "External id": 465966,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 19058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453706797.754, "dur": 4.454, + "args": { + "External id": 465967,"Sequence number": 5036794, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 19059 + } + }, + { + "ph": "s", "id": 249, "pid": 2338707, "tid": 2338707, "ts": 6267453706797.754, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453706804.870, "dur": 1.206, + "args": { + "External id": 465968,"Sequence number": 5036795, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453706839.694, "dur": 132745.741, + "args": { + "External id": 465969,"Sequence number": 5036795, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 19061 + } + }, + { + "ph": "s", "id": 248, "pid": 2338707, "tid": 2338707, "ts": 6267453706839.694, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267453706857.566, "dur": 37.007, + "args": { + "External id": 465970,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267453706860.794, "dur": 33.489, + "args": { + "External id": 465971,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453706862.534, "dur": 8.232, + "args": { + "External id": 465972,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453706866.318, "dur": 4.067, + "args": { + "External id": 465973,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453706871.621, "dur": 22.014, + "args": { + "External id": 465974,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 19066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453706915.274, "dur": 27.750, + "args": { + "External id": 465975,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453706916.526, "dur": 6.666, + "args": { + "External id": 465976,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706919.084, "dur": 3.807, + "args": { + "External id": 465977,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267453706924.200, "dur": 18.598, + "args": { + "External id": 465978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453706925.301, "dur": 17.103, + "args": { + "External id": 465979,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453706947.067, "dur": 26.789, + "args": { + "External id": 465980,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453706948.181, "dur": 8.246, + "args": { + "External id": 465981,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453706949.631, "dur": 6.566, + "args": { + "External id": 465982,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267453706959.457, "dur": 14.173, + "args": { + "External id": 465983,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453706959.884, "dur": 13.396, + "args": { + "External id": 465984,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 19076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2338707, + "ts": 6267453706979.114, "dur": 19.817, + "args": { + "External id": 465985,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453706980.337, "dur": 3.396, + "args": { + "External id": 465986,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267453706984.485, "dur": 14.066, + "args": { + "External id": 465987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 19079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453706985.111, "dur": 13.096, + "args": { + "External id": 465988,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2338707, + "ts": 6267453707006.098, "dur": 35.009, + "args": { + "External id": 465989,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453707045.296, "dur": 119.272, + "args": { + "External id": 465990,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453707050.699, "dur": 113.185, + "args": { + "External id": 465991,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453707097.158, "dur": 1.266, + "args": { + "External id": 465992,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453707104.205, "dur": 36.197, + "args": { + "External id": 465993,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453707106.175, "dur": 33.921, + "args": { + "External id": 465994,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 19086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453707109.434, "dur": 4.452, + "args": { + "External id": 465995,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453707114.969, "dur": 24.475, + "args": { + "External id": 465996,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 19088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2338707, + "ts": 6267453707170.412, "dur": 125200.241, + "args": { + "External id": 465997,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2338707, + "ts": 6267453707172.378, "dur": 125195.677, + "args": { + "External id": 465998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453832392.628, "dur": 16.610, + "args": { + "External id": 465999,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453832403.896, "dur": 1.401, + "args": { + "External id": 466000,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453832417.223, "dur": 154.513, + "args": { + "External id": 466001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453832420.288, "dur": 9.565, + "args": { + "External id": 466002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453832423.755, "dur": 4.962, + "args": { + "External id": 466003,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453832426.347, "dur": 0.947, + "args": { + "External id": 466004,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453832431.623, "dur": 139.134, + "args": { + "External id": 466005,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453832433.658, "dur": 136.032, + "args": { + "External id": 466006,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453832580.599, "dur": 7.070, + "args": { + "External id": 466007,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453832584.683, "dur": 1.117, + "args": { + "External id": 466008,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453832618.114, "dur": 6.842, + "args": { + "External id": 466009,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453832642.323, "dur": 14.483, + "args": { + "External id": 466010,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453832646.554, "dur": 9.912, + "args": { + "External id": 466011,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453832857.086, "dur": 390.238, + "args": { + "External id": 466012,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453832863.923, "dur": 2.440, + "args": { + "External id": 466013,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453832870.973, "dur": 375.378, + "args": { + "External id": 466014,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453832874.827, "dur": 0.917, + "args": { + "External id": 466015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453832879.348, "dur": 48.815, + "args": { + "External id": 466016,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453832932.491, "dur": 6.420, + "args": { + "External id": 466017,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453832936.758, "dur": 1.460, + "args": { + "External id": 466018,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453832942.871, "dur": 53.496, + "args": { + "External id": 466019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453832945.857, "dur": 2.014, + "args": { + "External id": 466020,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453832951.619, "dur": 44.064, + "args": { + "External id": 466021,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453832964.514, "dur": 5.203, + "args": { + "External id": 466022,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453833000.336, "dur": 42.216, + "args": { + "External id": 466023,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453833047.201, "dur": 64.784, + "args": { + "External id": 466024,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453833118.404, "dur": 26.039, + "args": { + "External id": 466025,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453833148.620, "dur": 20.093, + "args": { + "External id": 466026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453833171.805, "dur": 30.046, + "args": { + "External id": 466027,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453833175.418, "dur": 2.392, + "args": { + "External id": 466028,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453833180.776, "dur": 0.995, + "args": { + "External id": 466029,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453833207.759, "dur": 20.955, + "args": { + "External id": 466030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453833230.603, "dur": 14.017, + "args": { + "External id": 466031,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453833261.600, "dur": 3.992, + "args": { + "External id": 466032,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453833278.059, "dur": 6.550, + "args": { + "External id": 466033,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453833282.582, "dur": 0.596, + "args": { + "External id": 466034,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453833409.657, "dur": 104.569, + "args": { + "External id": 466035,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453833521.674, "dur": 11.400, + "args": { + "External id": 466036,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453833528.946, "dur": 1.413, + "args": { + "External id": 466037,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453833535.346, "dur": 35.785, + "args": { + "External id": 466038,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453833579.013, "dur": 8.767, + "args": { + "External id": 466039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453833582.239, "dur": 4.507, + "args": { + "External id": 466040,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453833585.212, "dur": 1.167, + "args": { + "External id": 466041,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453833610.914, "dur": 78.492, + "args": { + "External id": 466042,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453833613.358, "dur": 74.879, + "args": { + "External id": 466043,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453833699.759, "dur": 27.277, + "args": { + "External id": 466044,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453833737.474, "dur": 7.666, + "args": { + "External id": 466045,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453833741.329, "dur": 2.036, + "args": { + "External id": 466046,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453833751.096, "dur": 63.149, + "args": { + "External id": 466047,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453833752.509, "dur": 9.567, + "args": { + "External id": 466048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453833753.786, "dur": 7.545, + "args": { + "External id": 466049,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453833758.029, "dur": 3.024, + "args": { + "External id": 466050,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453833763.210, "dur": 50.399, + "args": { + "External id": 466051,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453833764.079, "dur": 48.684, + "args": { + "External id": 466052,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453833820.467, "dur": 4.487, + "args": { + "External id": 466053,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453833822.823, "dur": 0.440, + "args": { + "External id": 466054,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453833832.941, "dur": 2.182, + "args": { + "External id": 466055,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453833846.600, "dur": 10.770, + "args": { + "External id": 466056,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453833851.773, "dur": 5.270, + "args": { + "External id": 466057,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453833987.813, "dur": 305.478, + "args": { + "External id": 466058,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453833992.019, "dur": 2.248, + "args": { + "External id": 466059,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453833996.581, "dur": 295.936, + "args": { + "External id": 466060,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453833998.582, "dur": 0.412, + "args": { + "External id": 466061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453834003.793, "dur": 29.186, + "args": { + "External id": 466062,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453834035.576, "dur": 3.693, + "args": { + "External id": 466063,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453834038.129, "dur": 0.812, + "args": { + "External id": 466064,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453834043.897, "dur": 97.675, + "args": { + "External id": 466065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453834045.434, "dur": 5.876, + "args": { + "External id": 466066,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453834111.759, "dur": 29.334, + "args": { + "External id": 466067,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453834117.821, "dur": 3.633, + "args": { + "External id": 466068,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453834144.057, "dur": 28.870, + "args": { + "External id": 466069,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453834175.348, "dur": 18.385, + "args": { + "External id": 466070,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453834197.356, "dur": 16.487, + "args": { + "External id": 466071,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453834215.961, "dur": 14.999, + "args": { + "External id": 466072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453834232.953, "dur": 26.366, + "args": { + "External id": 466073,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453834235.915, "dur": 1.944, + "args": { + "External id": 466074,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453834242.721, "dur": 0.782, + "args": { + "External id": 466075,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453834261.271, "dur": 13.931, + "args": { + "External id": 466076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453834276.596, "dur": 14.518, + "args": { + "External id": 466077,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453834304.134, "dur": 3.485, + "args": { + "External id": 466078,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453834320.945, "dur": 5.745, + "args": { + "External id": 466079,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453834324.995, "dur": 0.650, + "args": { + "External id": 466080,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453834423.016, "dur": 80.910, + "args": { + "External id": 466081,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453834510.677, "dur": 8.947, + "args": { + "External id": 466082,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453834517.288, "dur": 0.951, + "args": { + "External id": 466083,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453834521.586, "dur": 42.005, + "args": { + "External id": 466084,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453834569.804, "dur": 8.685, + "args": { + "External id": 466085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453834571.739, "dur": 5.765, + "args": { + "External id": 466086,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453834574.178, "dur": 3.054, + "args": { + "External id": 466087,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453834582.219, "dur": 85.486, + "args": { + "External id": 466088,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453834586.039, "dur": 80.423, + "args": { + "External id": 466089,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453834674.587, "dur": 22.491, + "args": { + "External id": 466090,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453834705.322, "dur": 9.534, + "args": { + "External id": 466091,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453834712.614, "dur": 0.923, + "args": { + "External id": 466092,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453834720.499, "dur": 59.894, + "args": { + "External id": 466093,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453834721.675, "dur": 6.937, + "args": { + "External id": 466094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453834725.279, "dur": 2.668, + "args": { + "External id": 466095,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453834727.001, "dur": 0.780, + "args": { + "External id": 466096,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453834729.359, "dur": 50.573, + "args": { + "External id": 466097,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453834730.210, "dur": 48.858, + "args": { + "External id": 466098,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453834785.876, "dur": 4.844, + "args": { + "External id": 466099,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453834788.563, "dur": 0.724, + "args": { + "External id": 466100,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453834798.319, "dur": 1.907, + "args": { + "External id": 466101,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453834813.350, "dur": 11.429, + "args": { + "External id": 466102,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453834815.489, "dur": 8.884, + "args": { + "External id": 466103,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453834937.311, "dur": 254.627, + "args": { + "External id": 466104,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453834940.017, "dur": 1.806, + "args": { + "External id": 466105,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453834943.934, "dur": 247.105, + "args": { + "External id": 466106,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453834946.015, "dur": 0.438, + "args": { + "External id": 466107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453834950.430, "dur": 28.736, + "args": { + "External id": 466108,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453834981.196, "dur": 3.732, + "args": { + "External id": 466109,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453834983.570, "dur": 0.989, + "args": { + "External id": 466110,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453834985.914, "dur": 28.769, + "args": { + "External id": 466111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453834987.338, "dur": 1.612, + "args": { + "External id": 466112,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453834990.583, "dur": 23.614, + "args": { + "External id": 466113,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453834996.529, "dur": 3.340, + "args": { + "External id": 466114,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453835016.411, "dur": 22.352, + "args": { + "External id": 466115,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453835040.799, "dur": 52.683, + "args": { + "External id": 466116,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453835099.755, "dur": 18.893, + "args": { + "External id": 466117,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453835123.166, "dur": 13.444, + "args": { + "External id": 466118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453835139.323, "dur": 22.802, + "args": { + "External id": 466119,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453835142.155, "dur": 1.752, + "args": { + "External id": 466120,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453835146.419, "dur": 0.787, + "args": { + "External id": 466121,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453835164.104, "dur": 13.078, + "args": { + "External id": 466122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453835178.623, "dur": 11.155, + "args": { + "External id": 466123,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453835203.826, "dur": 2.863, + "args": { + "External id": 466124,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453835218.372, "dur": 4.321, + "args": { + "External id": 466125,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453835221.164, "dur": 0.478, + "args": { + "External id": 466126,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453835308.565, "dur": 68.325, + "args": { + "External id": 466127,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453835382.995, "dur": 5.833, + "args": { + "External id": 466128,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453835386.185, "dur": 1.197, + "args": { + "External id": 466129,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453835390.771, "dur": 27.215, + "args": { + "External id": 466130,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453835423.630, "dur": 9.179, + "args": { + "External id": 466131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453835428.395, "dur": 3.584, + "args": { + "External id": 466132,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453835431.059, "dur": 0.695, + "args": { + "External id": 466133,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453835435.813, "dur": 49.061, + "args": { + "External id": 466134,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453835437.369, "dur": 46.456, + "args": { + "External id": 466135,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453835489.305, "dur": 17.077, + "args": { + "External id": 466136,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453835513.719, "dur": 6.615, + "args": { + "External id": 466137,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453835518.535, "dur": 0.795, + "args": { + "External id": 466138,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453835525.469, "dur": 53.150, + "args": { + "External id": 466139,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453835526.707, "dur": 6.117, + "args": { + "External id": 466140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453835527.405, "dur": 4.735, + "args": { + "External id": 466141,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453835528.816, "dur": 3.148, + "args": { + "External id": 466142,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453835533.483, "dur": 44.406, + "args": { + "External id": 466143,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453835534.559, "dur": 42.420, + "args": { + "External id": 466144,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453835586.235, "dur": 4.301, + "args": { + "External id": 466145,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453835588.695, "dur": 0.504, + "args": { + "External id": 466146,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453835615.676, "dur": 2.553, + "args": { + "External id": 466147,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453835628.371, "dur": 10.516, + "args": { + "External id": 466148,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453835630.519, "dur": 8.039, + "args": { + "External id": 466149,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453835745.845, "dur": 199.594, + "args": { + "External id": 466150,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453835748.779, "dur": 1.734, + "args": { + "External id": 466151,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453835752.496, "dur": 192.461, + "args": { + "External id": 466152,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453835754.215, "dur": 0.721, + "args": { + "External id": 466153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453835758.529, "dur": 23.570, + "args": { + "External id": 466154,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453835784.334, "dur": 4.162, + "args": { + "External id": 466155,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453835787.463, "dur": 0.751, + "args": { + "External id": 466156,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453835789.521, "dur": 28.624, + "args": { + "External id": 466157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453835791.031, "dur": 3.611, + "args": { + "External id": 466158,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453835796.504, "dur": 21.328, + "args": { + "External id": 466159,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453835802.769, "dur": 2.624, + "args": { + "External id": 466160,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453835819.999, "dur": 22.720, + "args": { + "External id": 466161,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453835844.582, "dur": 14.335, + "args": { + "External id": 466162,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453835861.945, "dur": 14.022, + "args": { + "External id": 466163,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453835877.675, "dur": 13.338, + "args": { + "External id": 466164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453835892.764, "dur": 21.245, + "args": { + "External id": 466165,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453835895.315, "dur": 1.568, + "args": { + "External id": 466166,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453835899.257, "dur": 0.854, + "args": { + "External id": 466167,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453835918.433, "dur": 12.797, + "args": { + "External id": 466168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453835932.435, "dur": 11.053, + "args": { + "External id": 466169,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453835952.559, "dur": 1.887, + "args": { + "External id": 466170,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453835964.394, "dur": 4.833, + "args": { + "External id": 466171,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453835967.166, "dur": 1.043, + "args": { + "External id": 466172,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453836042.290, "dur": 113.164, + "args": { + "External id": 466173,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453836164.310, "dur": 8.499, + "args": { + "External id": 466174,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453836169.285, "dur": 1.308, + "args": { + "External id": 466175,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453836174.586, "dur": 27.898, + "args": { + "External id": 466176,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453836211.349, "dur": 8.953, + "args": { + "External id": 466177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453836213.274, "dur": 5.946, + "args": { + "External id": 466178,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453836215.586, "dur": 3.314, + "args": { + "External id": 466179,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453836223.982, "dur": 51.538, + "args": { + "External id": 466180,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453836225.675, "dur": 48.912, + "args": { + "External id": 466181,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453836280.337, "dur": 16.405, + "args": { + "External id": 466182,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453836303.987, "dur": 7.560, + "args": { + "External id": 466183,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453836309.497, "dur": 0.974, + "args": { + "External id": 466184,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453836316.750, "dur": 55.644, + "args": { + "External id": 466185,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453836317.698, "dur": 6.777, + "args": { + "External id": 466186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453836318.740, "dur": 5.105, + "args": { + "External id": 466187,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453836320.474, "dur": 3.227, + "args": { + "External id": 466188,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453836325.363, "dur": 46.621, + "args": { + "External id": 466189,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453836328.714, "dur": 42.371, + "args": { + "External id": 466190,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453836376.838, "dur": 4.254, + "args": { + "External id": 466191,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453836379.201, "dur": 0.609, + "args": { + "External id": 466192,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453836388.161, "dur": 1.681, + "args": { + "External id": 466193,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453836398.880, "dur": 7.145, + "args": { + "External id": 466194,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453836401.114, "dur": 4.616, + "args": { + "External id": 466195,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453836509.734, "dur": 224.301, + "args": { + "External id": 466196,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453836511.934, "dur": 1.950, + "args": { + "External id": 466197,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453836518.221, "dur": 215.145, + "args": { + "External id": 466198,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453836519.895, "dur": 0.437, + "args": { + "External id": 466199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453836521.689, "dur": 22.047, + "args": { + "External id": 466200,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453836545.679, "dur": 5.893, + "args": { + "External id": 466201,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453836548.216, "dur": 3.039, + "args": { + "External id": 466202,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453836552.703, "dur": 26.568, + "args": { + "External id": 466203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453836554.501, "dur": 1.284, + "args": { + "External id": 466204,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453836559.755, "dur": 19.232, + "args": { + "External id": 466205,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453836562.548, "dur": 3.147, + "args": { + "External id": 466206,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453836580.717, "dur": 43.194, + "args": { + "External id": 466207,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453836626.840, "dur": 15.641, + "args": { + "External id": 466208,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453836646.290, "dur": 14.911, + "args": { + "External id": 466209,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453836662.832, "dur": 13.507, + "args": { + "External id": 466210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453836678.135, "dur": 27.057, + "args": { + "External id": 466211,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453836680.721, "dur": 2.014, + "args": { + "External id": 466212,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453836687.933, "dur": 2.692, + "args": { + "External id": 466213,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453836706.820, "dur": 12.831, + "args": { + "External id": 466214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453836720.931, "dur": 11.311, + "args": { + "External id": 466215,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453836743.127, "dur": 2.082, + "args": { + "External id": 466216,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453836755.775, "dur": 4.542, + "args": { + "External id": 466217,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453836758.844, "dur": 0.406, + "args": { + "External id": 466218,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453836838.975, "dur": 63.634, + "args": { + "External id": 466219,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453836917.201, "dur": 11.284, + "args": { + "External id": 466220,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453836926.204, "dur": 0.741, + "args": { + "External id": 466221,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453836930.042, "dur": 25.438, + "args": { + "External id": 466222,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453836960.921, "dur": 8.292, + "args": { + "External id": 466223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453836962.746, "dur": 5.554, + "args": { + "External id": 466224,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453836965.210, "dur": 2.885, + "args": { + "External id": 466225,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453836972.366, "dur": 50.045, + "args": { + "External id": 466226,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453836975.859, "dur": 45.890, + "args": { + "External id": 466227,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453837026.859, "dur": 15.443, + "args": { + "External id": 466228,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453837049.460, "dur": 4.786, + "args": { + "External id": 466229,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453837052.599, "dur": 0.598, + "args": { + "External id": 466230,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453837098.507, "dur": 66.649, + "args": { + "External id": 466231,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453837099.786, "dur": 9.763, + "args": { + "External id": 466232,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453837102.620, "dur": 5.955, + "args": { + "External id": 466233,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453837107.162, "dur": 1.149, + "args": { + "External id": 466234,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453837110.276, "dur": 54.460, + "args": { + "External id": 466235,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453837113.707, "dur": 50.435, + "args": { + "External id": 466236,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453837171.105, "dur": 4.426, + "args": { + "External id": 466237,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453837173.747, "dur": 0.523, + "args": { + "External id": 466238,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453837182.756, "dur": 1.710, + "args": { + "External id": 466239,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453837197.203, "dur": 11.297, + "args": { + "External id": 466240,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453837199.290, "dur": 8.875, + "args": { + "External id": 466241,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453837307.262, "dur": 198.841, + "args": { + "External id": 466242,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453837309.942, "dur": 2.298, + "args": { + "External id": 466243,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453837314.418, "dur": 191.206, + "args": { + "External id": 466244,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453837316.127, "dur": 0.435, + "args": { + "External id": 466245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453837318.029, "dur": 27.313, + "args": { + "External id": 466246,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453837349.884, "dur": 3.160, + "args": { + "External id": 466247,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453837351.958, "dur": 0.772, + "args": { + "External id": 466248,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453837354.270, "dur": 25.257, + "args": { + "External id": 466249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453837355.621, "dur": 1.322, + "args": { + "External id": 466250,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453837358.526, "dur": 20.605, + "args": { + "External id": 466251,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453837364.245, "dur": 2.492, + "args": { + "External id": 466252,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453837380.946, "dur": 23.343, + "args": { + "External id": 466253,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453837406.016, "dur": 13.965, + "args": { + "External id": 466254,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453837422.724, "dur": 14.502, + "args": { + "External id": 466255,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453837438.712, "dur": 12.532, + "args": { + "External id": 466256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453837453.046, "dur": 24.146, + "args": { + "External id": 466257,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453837457.875, "dur": 2.075, + "args": { + "External id": 466258,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453837462.202, "dur": 0.517, + "args": { + "External id": 466259,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453837478.991, "dur": 13.739, + "args": { + "External id": 466260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453837493.762, "dur": 10.790, + "args": { + "External id": 466261,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453837513.412, "dur": 1.624, + "args": { + "External id": 466262,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453837524.913, "dur": 3.900, + "args": { + "External id": 466263,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453837527.306, "dur": 0.525, + "args": { + "External id": 466264,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453837619.797, "dur": 68.587, + "args": { + "External id": 466265,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453837695.722, "dur": 6.990, + "args": { + "External id": 466266,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453837700.251, "dur": 0.891, + "args": { + "External id": 466267,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453837704.296, "dur": 26.992, + "args": { + "External id": 466268,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453837737.080, "dur": 12.770, + "args": { + "External id": 466269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453837738.631, "dur": 10.340, + "args": { + "External id": 466270,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453837747.947, "dur": 0.817, + "args": { + "External id": 466271,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453837752.831, "dur": 51.374, + "args": { + "External id": 466272,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453837754.271, "dur": 49.007, + "args": { + "External id": 466273,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453837808.572, "dur": 16.833, + "args": { + "External id": 466274,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453837832.864, "dur": 4.676, + "args": { + "External id": 466275,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453837835.886, "dur": 0.672, + "args": { + "External id": 466276,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453837842.146, "dur": 56.814, + "args": { + "External id": 466277,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453837845.520, "dur": 6.395, + "args": { + "External id": 466278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453837846.374, "dur": 4.867, + "args": { + "External id": 466279,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453837848.351, "dur": 2.702, + "args": { + "External id": 466280,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453837852.769, "dur": 45.702, + "args": { + "External id": 466281,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453837853.417, "dur": 44.108, + "args": { + "External id": 466282,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453837903.535, "dur": 4.320, + "args": { + "External id": 466283,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453837905.959, "dur": 0.491, + "args": { + "External id": 466284,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453837917.858, "dur": 1.607, + "args": { + "External id": 466285,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453837928.757, "dur": 6.502, + "args": { + "External id": 466286,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453837930.879, "dur": 4.093, + "args": { + "External id": 466287,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453838036.490, "dur": 276.516, + "args": { + "External id": 466288,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453838039.681, "dur": 2.157, + "args": { + "External id": 466289,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453838043.436, "dur": 268.804, + "args": { + "External id": 466290,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453838047.475, "dur": 0.450, + "args": { + "External id": 466291,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453838052.413, "dur": 70.937, + "args": { + "External id": 466292,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453838126.567, "dur": 3.903, + "args": { + "External id": 466293,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453838129.451, "dur": 0.770, + "args": { + "External id": 466294,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453838131.829, "dur": 27.109, + "args": { + "External id": 466295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453838133.192, "dur": 4.264, + "args": { + "External id": 466296,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453838139.215, "dur": 19.328, + "args": { + "External id": 466297,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453838142.855, "dur": 2.997, + "args": { + "External id": 466298,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453838160.630, "dur": 28.989, + "args": { + "External id": 466299,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453838191.806, "dur": 15.152, + "args": { + "External id": 466300,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453838212.508, "dur": 14.872, + "args": { + "External id": 466301,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453838229.286, "dur": 19.787, + "args": { + "External id": 466302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453838251.252, "dur": 25.890, + "args": { + "External id": 466303,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453838253.703, "dur": 1.537, + "args": { + "External id": 466304,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453838257.780, "dur": 0.729, + "args": { + "External id": 466305,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453838279.024, "dur": 15.835, + "args": { + "External id": 466306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453838298.366, "dur": 12.627, + "args": { + "External id": 466307,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453838322.683, "dur": 2.200, + "args": { + "External id": 466308,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453838336.579, "dur": 5.874, + "args": { + "External id": 466309,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453838339.745, "dur": 1.696, + "args": { + "External id": 466310,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453838428.880, "dur": 72.847, + "args": { + "External id": 466311,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453838508.000, "dur": 5.493, + "args": { + "External id": 466312,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453838511.307, "dur": 0.870, + "args": { + "External id": 466313,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453838515.130, "dur": 27.401, + "args": { + "External id": 466314,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453838551.347, "dur": 6.443, + "args": { + "External id": 466315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453838553.144, "dur": 3.729, + "args": { + "External id": 466316,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453838555.423, "dur": 1.251, + "args": { + "External id": 466317,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453838560.846, "dur": 71.295, + "args": { + "External id": 466318,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453838562.340, "dur": 68.415, + "args": { + "External id": 466319,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453838638.641, "dur": 20.963, + "args": { + "External id": 466320,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453838667.561, "dur": 8.165, + "args": { + "External id": 466321,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453838673.507, "dur": 0.938, + "args": { + "External id": 466322,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453838680.807, "dur": 62.816, + "args": { + "External id": 466323,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453838681.626, "dur": 7.366, + "args": { + "External id": 466324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453838682.943, "dur": 5.304, + "args": { + "External id": 466325,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453838687.282, "dur": 0.824, + "args": { + "External id": 466326,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453838689.787, "dur": 53.268, + "args": { + "External id": 466327,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453838692.892, "dur": 49.378, + "args": { + "External id": 466328,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453838748.572, "dur": 4.152, + "args": { + "External id": 466329,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453838751.039, "dur": 0.391, + "args": { + "External id": 466330,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453838759.573, "dur": 1.548, + "args": { + "External id": 466331,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453838770.224, "dur": 7.308, + "args": { + "External id": 466332,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453838772.512, "dur": 4.678, + "args": { + "External id": 466333,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453838880.838, "dur": 266.579, + "args": { + "External id": 466334,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453838883.537, "dur": 4.381, + "args": { + "External id": 466335,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453838889.866, "dur": 257.142, + "args": { + "External id": 466336,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453838894.317, "dur": 0.630, + "args": { + "External id": 466337,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453838896.534, "dur": 26.690, + "args": { + "External id": 466338,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453838925.076, "dur": 9.293, + "args": { + "External id": 466339,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453838931.383, "dur": 2.740, + "args": { + "External id": 466340,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453838936.000, "dur": 26.740, + "args": { + "External id": 466341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453838939.796, "dur": 1.244, + "args": { + "External id": 466342,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453838942.271, "dur": 20.127, + "args": { + "External id": 466343,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453838945.412, "dur": 2.744, + "args": { + "External id": 466344,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453838964.378, "dur": 25.410, + "args": { + "External id": 466345,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453838991.353, "dur": 14.766, + "args": { + "External id": 466346,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453839008.719, "dur": 16.911, + "args": { + "External id": 466347,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453839027.333, "dur": 14.242, + "args": { + "External id": 466348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453839043.547, "dur": 67.474, + "args": { + "External id": 466349,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453839046.036, "dur": 1.692, + "args": { + "External id": 466350,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453839052.555, "dur": 0.362, + "args": { + "External id": 466351,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453839114.345, "dur": 15.910, + "args": { + "External id": 466352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453839131.725, "dur": 13.917, + "args": { + "External id": 466353,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453839157.030, "dur": 2.952, + "args": { + "External id": 466354,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453839170.996, "dur": 4.938, + "args": { + "External id": 466355,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453839174.279, "dur": 0.547, + "args": { + "External id": 466356,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453839255.295, "dur": 69.291, + "args": { + "External id": 466357,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453839330.565, "dur": 11.013, + "args": { + "External id": 466358,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453839337.155, "dur": 2.948, + "args": { + "External id": 466359,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453839343.400, "dur": 30.391, + "args": { + "External id": 466360,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453839379.056, "dur": 6.228, + "args": { + "External id": 466361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453839380.741, "dur": 3.737, + "args": { + "External id": 466362,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453839383.317, "dur": 0.855, + "args": { + "External id": 466363,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453839387.886, "dur": 48.720, + "args": { + "External id": 466364,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453839389.603, "dur": 46.212, + "args": { + "External id": 466365,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453839443.596, "dur": 17.761, + "args": { + "External id": 466366,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453839467.032, "dur": 27.751, + "args": { + "External id": 466367,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453839469.778, "dur": 24.544, + "args": { + "External id": 466368,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453839475.367, "dur": 1.151, + "args": { + "External id": 466369,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453839500.820, "dur": 31.111, + "args": { + "External id": 466370,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453839502.931, "dur": 28.712, + "args": { + "External id": 466371,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 19463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453839508.397, "dur": 4.470, + "args": { + "External id": 466372,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453839514.275, "dur": 16.848, + "args": { + "External id": 466373,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2338707, + "ts": 6267453839549.613, "dur": 5.697, + "args": { + "External id": 466374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2338707, + "ts": 6267453839551.834, "dur": 3.129, + "args": { + "External id": 466375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2338707, + "ts": 6267453839556.466, "dur": 1.524, + "args": { + "External id": 466376,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2338707, + "ts": 6267453839557.214, "dur": 0.689, + "args": { + "External id": 466377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453839627.861, "dur": 32.610, + "args": { + "External id": 466378,"Sequence number": 5036796, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 19470 + } + }, + { + "ph": "s", "id": 247, "pid": 2338707, "tid": 2338707, "ts": 6267453839627.861, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453839667.806, "dur": 8.262, + "args": { + "External id": 466379,"Sequence number": 5036797, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 19471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453839672.886, "dur": 1.333, + "args": { + "External id": 466380,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2338707, + "ts": 6267453839682.090, "dur": 6.754, + "args": { + "External id": 466381,"Sequence number": 5036797, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 19473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453839687.077, "dur": 0.431, + "args": { + "External id": 466382,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453839690.361, "dur": 2.704, + "args": { + "External id": 466383,"Sequence number": 5036797, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 19475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453839692.056, "dur": 0.293, + "args": { + "External id": 466384,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 19476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453839698.176, "dur": 8.583, + "args": { + "External id": 466385,"Sequence number": 5036797, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19477 + } + }, + { + "ph": "s", "id": 246, "pid": 2338707, "tid": 2338707, "ts": 6267453839698.176, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453839704.060, "dur": 0.901, + "args": { + "External id": 466386,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453839708.292, "dur": 5.765, + "args": { + "External id": 466387,"Sequence number": 5036798, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19479 + } + }, + { + "ph": "s", "id": 245, "pid": 2338707, "tid": 2338707, "ts": 6267453839708.292, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453839712.641, "dur": 0.455, + "args": { + "External id": 466388,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2338707, + "ts": 6267453839715.281, "dur": 9.204, + "args": { + "External id": 466389,"Sequence number": 5036799, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 19481 + } + }, + { + "ph": "s", "id": 244, "pid": 2338707, "tid": 2338707, "ts": 6267453839715.281, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453839722.480, "dur": 0.874, + "args": { + "External id": 466390,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453839725.617, "dur": 7.789, + "args": { + "External id": 466391,"Sequence number": 5036800, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 19483 + } + }, + { + "ph": "s", "id": 243, "pid": 2338707, "tid": 2338707, "ts": 6267453839725.617, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453839728.917, "dur": 3.515, + "args": { + "External id": 466392,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 19484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267453839737.992, "dur": 35.880, + "args": { + "External id": 466393,"Sequence number": 5036801, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267453839739.850, "dur": 33.785, + "args": { + "External id": 466394,"Sequence number": 5036801, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453839742.873, "dur": 7.558, + "args": { + "External id": 466395,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 19487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453839745.743, "dur": 4.109, + "args": { + "External id": 466396,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453839751.864, "dur": 21.040, + "args": { + "External id": 466397,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 19489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453839804.904, "dur": 3.907, + "args": { + "External id": 466398,"Sequence number": 5036801, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 19490 + } + }, + { + "ph": "s", "id": 242, "pid": 2338707, "tid": 2338707, "ts": 6267453839804.904, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453839814.042, "dur": 3.398, + "args": { + "External id": 466399,"Sequence number": 5036802, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453839855.342, "dur": 45731.636, + "args": { + "External id": 466400,"Sequence number": 5036802, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 19492 + } + }, + { + "ph": "s", "id": 241, "pid": 2338707, "tid": 2338707, "ts": 6267453839855.342, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267453839874.735, "dur": 30.146, + "args": { + "External id": 466401,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267453839875.665, "dur": 28.982, + "args": { + "External id": 466402,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453839877.524, "dur": 5.368, + "args": { + "External id": 466403,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453839879.010, "dur": 3.326, + "args": { + "External id": 466404,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453839883.921, "dur": 20.176, + "args": { + "External id": 466405,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 19497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453839924.419, "dur": 34.208, + "args": { + "External id": 466406,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453839925.759, "dur": 6.877, + "args": { + "External id": 466407,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453839928.295, "dur": 3.985, + "args": { + "External id": 466408,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267453839934.296, "dur": 24.066, + "args": { + "External id": 466409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453839939.249, "dur": 18.527, + "args": { + "External id": 466410,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453839963.168, "dur": 29.813, + "args": { + "External id": 466411,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453839964.101, "dur": 5.705, + "args": { + "External id": 466412,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453839966.056, "dur": 3.492, + "args": { + "External id": 466413,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267453839970.491, "dur": 22.268, + "args": { + "External id": 466414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453839971.279, "dur": 20.943, + "args": { + "External id": 466415,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 19507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2338707, + "ts": 6267453840000.442, "dur": 24.805, + "args": { + "External id": 466416,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453840002.484, "dur": 5.094, + "args": { + "External id": 466417,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267453840008.362, "dur": 16.576, + "args": { + "External id": 466418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 19510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453840012.295, "dur": 12.302, + "args": { + "External id": 466419,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2338707, + "ts": 6267453840031.207, "dur": 73.502, + "args": { + "External id": 466420,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453840110.386, "dur": 88.151, + "args": { + "External id": 466421,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453840113.364, "dur": 84.688, + "args": { + "External id": 466422,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453840119.257, "dur": 0.908, + "args": { + "External id": 466423,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453840121.728, "dur": 37.329, + "args": { + "External id": 466424,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453840123.860, "dur": 34.911, + "args": { + "External id": 466425,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 19517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453840129.852, "dur": 4.048, + "args": { + "External id": 466426,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453840135.041, "dur": 23.239, + "args": { + "External id": 466427,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 19519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2338707, + "ts": 6267453840203.598, "dur": 38418.091, + "args": { + "External id": 466428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2338707, + "ts": 6267453840205.671, "dur": 38414.896, + "args": { + "External id": 466429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453878638.419, "dur": 10.479, + "args": { + "External id": 466430,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453878644.377, "dur": 1.304, + "args": { + "External id": 466431,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453878656.013, "dur": 202.829, + "args": { + "External id": 466432,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453878720.103, "dur": 10.188, + "args": { + "External id": 466433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453878724.212, "dur": 3.835, + "args": { + "External id": 466434,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453878726.755, "dur": 0.956, + "args": { + "External id": 466435,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453878734.470, "dur": 123.498, + "args": { + "External id": 466436,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453878737.453, "dur": 119.355, + "args": { + "External id": 466437,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453878865.144, "dur": 6.240, + "args": { + "External id": 466438,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453878868.980, "dur": 0.578, + "args": { + "External id": 466439,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453878883.327, "dur": 3.893, + "args": { + "External id": 466440,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453878899.967, "dur": 11.902, + "args": { + "External id": 466441,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453878905.469, "dur": 6.032, + "args": { + "External id": 466442,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453879129.423, "dur": 280.104, + "args": { + "External id": 466443,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453879135.945, "dur": 4.807, + "args": { + "External id": 466444,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453879143.259, "dur": 265.526, + "args": { + "External id": 466445,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453879146.291, "dur": 0.707, + "args": { + "External id": 466446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453879149.193, "dur": 38.836, + "args": { + "External id": 466447,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453879191.183, "dur": 7.804, + "args": { + "External id": 466448,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453879197.431, "dur": 1.232, + "args": { + "External id": 466449,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453879200.783, "dur": 36.493, + "args": { + "External id": 466450,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453879205.937, "dur": 1.387, + "args": { + "External id": 466451,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453879210.411, "dur": 26.554, + "args": { + "External id": 466452,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453879214.891, "dur": 4.730, + "args": { + "External id": 466453,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453879240.384, "dur": 31.143, + "args": { + "External id": 466454,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453879274.971, "dur": 20.025, + "args": { + "External id": 466455,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453879299.816, "dur": 22.036, + "args": { + "External id": 466456,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453879324.845, "dur": 17.537, + "args": { + "External id": 466457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453879344.532, "dur": 28.048, + "args": { + "External id": 466458,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453879347.197, "dur": 1.855, + "args": { + "External id": 466459,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453879353.686, "dur": 0.889, + "args": { + "External id": 466460,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453879374.389, "dur": 17.170, + "args": { + "External id": 466461,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453879393.034, "dur": 14.496, + "args": { + "External id": 466462,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453879418.601, "dur": 2.674, + "args": { + "External id": 466463,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453879429.901, "dur": 5.965, + "args": { + "External id": 466464,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453879434.050, "dur": 0.430, + "args": { + "External id": 466465,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453879533.458, "dur": 111.025, + "args": { + "External id": 466466,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453879656.930, "dur": 11.940, + "args": { + "External id": 466467,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453879662.667, "dur": 1.235, + "args": { + "External id": 466468,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453879670.721, "dur": 34.728, + "args": { + "External id": 466469,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453879716.430, "dur": 10.976, + "args": { + "External id": 466470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453879720.046, "dur": 6.248, + "args": { + "External id": 466471,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453879722.427, "dur": 3.529, + "args": { + "External id": 466472,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453879734.456, "dur": 65.719, + "args": { + "External id": 466473,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453879736.044, "dur": 63.168, + "args": { + "External id": 466474,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453879805.500, "dur": 21.130, + "args": { + "External id": 466475,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453879835.011, "dur": 5.856, + "args": { + "External id": 466476,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453879838.807, "dur": 0.723, + "args": { + "External id": 466477,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453879846.161, "dur": 61.246, + "args": { + "External id": 466478,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453879847.437, "dur": 7.167, + "args": { + "External id": 466479,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453879851.191, "dur": 2.700, + "args": { + "External id": 466480,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453879853.038, "dur": 0.708, + "args": { + "External id": 466481,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453879855.400, "dur": 51.498, + "args": { + "External id": 466482,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453879856.270, "dur": 49.694, + "args": { + "External id": 466483,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453879912.964, "dur": 4.427, + "args": { + "External id": 466484,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453879915.602, "dur": 0.423, + "args": { + "External id": 466485,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453879925.637, "dur": 2.024, + "args": { + "External id": 466486,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453879939.418, "dur": 8.402, + "args": { + "External id": 466487,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453879941.695, "dur": 5.798, + "args": { + "External id": 466488,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453880110.713, "dur": 243.812, + "args": { + "External id": 466489,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453880114.289, "dur": 3.667, + "args": { + "External id": 466490,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453880123.019, "dur": 231.001, + "args": { + "External id": 466491,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453880124.953, "dur": 0.679, + "args": { + "External id": 466492,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453880129.925, "dur": 30.100, + "args": { + "External id": 466493,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453880161.645, "dur": 6.502, + "args": { + "External id": 466494,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453880164.636, "dur": 3.088, + "args": { + "External id": 466495,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453880169.505, "dur": 27.221, + "args": { + "External id": 466496,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453880170.960, "dur": 1.284, + "args": { + "External id": 466497,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453880174.007, "dur": 22.420, + "args": { + "External id": 466498,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453880177.644, "dur": 2.728, + "args": { + "External id": 466499,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453880198.614, "dur": 31.092, + "args": { + "External id": 466500,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453880231.940, "dur": 18.482, + "args": { + "External id": 466501,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453880253.793, "dur": 17.499, + "args": { + "External id": 466502,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453880272.859, "dur": 15.738, + "args": { + "External id": 466503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453880292.991, "dur": 27.461, + "args": { + "External id": 466504,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453880295.636, "dur": 2.000, + "args": { + "External id": 466505,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453880300.144, "dur": 2.793, + "args": { + "External id": 466506,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453880322.189, "dur": 15.259, + "args": { + "External id": 466507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453880338.853, "dur": 13.952, + "args": { + "External id": 466508,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453880364.313, "dur": 2.497, + "args": { + "External id": 466509,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453880378.292, "dur": 5.964, + "args": { + "External id": 466510,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453880381.609, "dur": 0.444, + "args": { + "External id": 466511,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453880473.097, "dur": 77.314, + "args": { + "External id": 466512,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453880556.581, "dur": 5.780, + "args": { + "External id": 466513,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453880560.066, "dur": 0.748, + "args": { + "External id": 466514,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453880564.189, "dur": 50.715, + "args": { + "External id": 466515,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453880622.913, "dur": 9.899, + "args": { + "External id": 466516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453880625.207, "dur": 6.592, + "args": { + "External id": 466517,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453880629.906, "dur": 1.645, + "args": { + "External id": 466518,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453880636.513, "dur": 59.167, + "args": { + "External id": 466519,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453880637.943, "dur": 57.111, + "args": { + "External id": 466520,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453880700.657, "dur": 21.630, + "args": { + "External id": 466521,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453880730.313, "dur": 4.837, + "args": { + "External id": 466522,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453880733.130, "dur": 0.706, + "args": { + "External id": 466523,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453880740.625, "dur": 66.509, + "args": { + "External id": 466524,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453880744.200, "dur": 4.479, + "args": { + "External id": 466525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453880745.493, "dur": 2.440, + "args": { + "External id": 466526,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453880747.246, "dur": 0.527, + "args": { + "External id": 466527,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453880749.443, "dur": 57.191, + "args": { + "External id": 466528,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453880750.235, "dur": 55.699, + "args": { + "External id": 466529,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453880812.611, "dur": 4.454, + "args": { + "External id": 466530,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453880815.149, "dur": 0.657, + "args": { + "External id": 466531,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453880826.742, "dur": 1.912, + "args": { + "External id": 466532,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453880838.746, "dur": 6.608, + "args": { + "External id": 466533,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453880840.779, "dur": 4.282, + "args": { + "External id": 466534,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453880959.046, "dur": 256.055, + "args": { + "External id": 466535,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453880964.271, "dur": 4.418, + "args": { + "External id": 466536,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453880970.659, "dur": 243.994, + "args": { + "External id": 466537,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453880974.836, "dur": 0.594, + "args": { + "External id": 466538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453880976.635, "dur": 26.358, + "args": { + "External id": 466539,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453881005.166, "dur": 5.870, + "args": { + "External id": 466540,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453881010.076, "dur": 0.705, + "args": { + "External id": 466541,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453881011.964, "dur": 24.619, + "args": { + "External id": 466542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453881013.650, "dur": 1.031, + "args": { + "External id": 466543,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453881016.076, "dur": 20.072, + "args": { + "External id": 466544,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453881019.195, "dur": 3.249, + "args": { + "External id": 466545,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453881038.095, "dur": 60.116, + "args": { + "External id": 466546,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453881101.431, "dur": 20.834, + "args": { + "External id": 466547,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453881128.470, "dur": 15.886, + "args": { + "External id": 466548,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453881146.127, "dur": 12.895, + "args": { + "External id": 466549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453881161.416, "dur": 22.690, + "args": { + "External id": 466550,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453881164.195, "dur": 2.405, + "args": { + "External id": 466551,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453881168.656, "dur": 0.680, + "args": { + "External id": 466552,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453881185.665, "dur": 12.521, + "args": { + "External id": 466553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453881202.348, "dur": 11.021, + "args": { + "External id": 466554,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453881224.614, "dur": 3.026, + "args": { + "External id": 466555,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453881239.172, "dur": 5.898, + "args": { + "External id": 466556,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453881242.289, "dur": 0.897, + "args": { + "External id": 466557,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453881329.484, "dur": 68.071, + "args": { + "External id": 466558,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453881403.818, "dur": 5.656, + "args": { + "External id": 466559,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453881407.530, "dur": 0.682, + "args": { + "External id": 466560,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453881411.186, "dur": 28.876, + "args": { + "External id": 466561,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453881447.549, "dur": 6.063, + "args": { + "External id": 466562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453881449.550, "dur": 3.293, + "args": { + "External id": 466563,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453881451.583, "dur": 1.056, + "args": { + "External id": 466564,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453881457.036, "dur": 47.596, + "args": { + "External id": 466565,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453881458.459, "dur": 45.397, + "args": { + "External id": 466566,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453881509.081, "dur": 16.535, + "args": { + "External id": 466567,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453881532.675, "dur": 7.313, + "args": { + "External id": 466568,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453881538.258, "dur": 0.554, + "args": { + "External id": 466569,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453881544.749, "dur": 69.231, + "args": { + "External id": 466570,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453881545.761, "dur": 4.294, + "args": { + "External id": 466571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453881546.921, "dur": 2.491, + "args": { + "External id": 466572,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453881548.511, "dur": 0.752, + "args": { + "External id": 466573,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453881550.668, "dur": 62.695, + "args": { + "External id": 466574,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453881553.943, "dur": 58.097, + "args": { + "External id": 466575,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453881622.116, "dur": 5.567, + "args": { + "External id": 466576,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453881625.182, "dur": 0.801, + "args": { + "External id": 466577,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453881634.864, "dur": 2.123, + "args": { + "External id": 466578,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453881646.652, "dur": 9.864, + "args": { + "External id": 466579,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453881648.812, "dur": 7.369, + "args": { + "External id": 466580,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453881765.543, "dur": 207.966, + "args": { + "External id": 466581,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453881767.923, "dur": 2.707, + "args": { + "External id": 466582,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453881772.434, "dur": 200.599, + "args": { + "External id": 466583,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453881774.206, "dur": 0.429, + "args": { + "External id": 466584,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453881776.098, "dur": 30.290, + "args": { + "External id": 466585,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453881808.695, "dur": 3.752, + "args": { + "External id": 466586,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453881811.197, "dur": 0.964, + "args": { + "External id": 466587,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453881813.310, "dur": 28.530, + "args": { + "External id": 466588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453881814.761, "dur": 1.667, + "args": { + "External id": 466589,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453881820.461, "dur": 20.927, + "args": { + "External id": 466590,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453881826.077, "dur": 2.605, + "args": { + "External id": 466591,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453881843.615, "dur": 22.682, + "args": { + "External id": 466592,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453881868.206, "dur": 14.918, + "args": { + "External id": 466593,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453881885.843, "dur": 14.247, + "args": { + "External id": 466594,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453881904.521, "dur": 13.398, + "args": { + "External id": 466595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453881919.841, "dur": 21.542, + "args": { + "External id": 466596,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453881922.498, "dur": 2.034, + "args": { + "External id": 466597,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453881926.423, "dur": 0.958, + "args": { + "External id": 466598,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453881945.459, "dur": 12.688, + "args": { + "External id": 466599,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453881959.499, "dur": 12.271, + "args": { + "External id": 466600,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453881981.342, "dur": 1.762, + "args": { + "External id": 466601,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453881992.625, "dur": 4.212, + "args": { + "External id": 466602,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453881995.226, "dur": 0.609, + "args": { + "External id": 466603,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453882118.699, "dur": 70.955, + "args": { + "External id": 466604,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453882197.241, "dur": 9.737, + "args": { + "External id": 466605,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453882201.926, "dur": 3.239, + "args": { + "External id": 466606,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453882211.328, "dur": 32.471, + "args": { + "External id": 466607,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453882250.148, "dur": 6.417, + "args": { + "External id": 466608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453882252.183, "dur": 3.330, + "args": { + "External id": 466609,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453882254.245, "dur": 1.026, + "args": { + "External id": 466610,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453882259.690, "dur": 49.072, + "args": { + "External id": 466611,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453882261.143, "dur": 46.735, + "args": { + "External id": 466612,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453882313.452, "dur": 17.328, + "args": { + "External id": 466613,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453882339.937, "dur": 4.927, + "args": { + "External id": 466614,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453882343.308, "dur": 0.501, + "args": { + "External id": 466615,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453882349.683, "dur": 54.593, + "args": { + "External id": 466616,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453882350.834, "dur": 4.471, + "args": { + "External id": 466617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453882352.002, "dur": 2.604, + "args": { + "External id": 466618,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453882353.703, "dur": 0.758, + "args": { + "External id": 466619,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453882358.540, "dur": 45.241, + "args": { + "External id": 466620,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453882359.301, "dur": 43.742, + "args": { + "External id": 466621,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453882408.835, "dur": 4.386, + "args": { + "External id": 466622,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453882411.601, "dur": 0.355, + "args": { + "External id": 466623,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453882419.842, "dur": 1.717, + "args": { + "External id": 466624,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453882430.959, "dur": 10.058, + "args": { + "External id": 466625,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453882435.417, "dur": 5.297, + "args": { + "External id": 466626,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453882546.352, "dur": 251.120, + "args": { + "External id": 466627,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453882548.988, "dur": 4.834, + "args": { + "External id": 466628,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453882555.686, "dur": 241.175, + "args": { + "External id": 466629,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453882557.069, "dur": 0.587, + "args": { + "External id": 466630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453882559.148, "dur": 27.092, + "args": { + "External id": 466631,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453882588.244, "dur": 26.696, + "args": { + "External id": 466632,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453882612.617, "dur": 1.545, + "args": { + "External id": 466633,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453882617.938, "dur": 34.827, + "args": { + "External id": 466634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453882622.399, "dur": 2.378, + "args": { + "External id": 466635,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453882626.497, "dur": 25.829, + "args": { + "External id": 466636,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453882630.220, "dur": 3.766, + "args": { + "External id": 466637,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453882654.324, "dur": 24.114, + "args": { + "External id": 466638,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453882680.139, "dur": 16.802, + "args": { + "External id": 466639,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453882699.646, "dur": 16.767, + "args": { + "External id": 466640,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453882718.106, "dur": 14.984, + "args": { + "External id": 466641,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453882735.064, "dur": 30.089, + "args": { + "External id": 466642,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453882737.873, "dur": 1.800, + "args": { + "External id": 466643,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453882744.229, "dur": 3.089, + "args": { + "External id": 466644,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453882766.765, "dur": 14.975, + "args": { + "External id": 466645,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453882782.860, "dur": 12.705, + "args": { + "External id": 466646,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453882806.792, "dur": 2.352, + "args": { + "External id": 466647,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453882820.089, "dur": 4.996, + "args": { + "External id": 466648,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453882823.348, "dur": 0.768, + "args": { + "External id": 466649,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453882907.097, "dur": 65.955, + "args": { + "External id": 466650,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453882981.778, "dur": 5.562, + "args": { + "External id": 466651,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453882985.139, "dur": 0.774, + "args": { + "External id": 466652,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453882989.157, "dur": 28.595, + "args": { + "External id": 466653,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453883022.634, "dur": 6.691, + "args": { + "External id": 466654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453883024.407, "dur": 3.833, + "args": { + "External id": 466655,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453883026.541, "dur": 1.474, + "args": { + "External id": 466656,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453883034.985, "dur": 90.473, + "args": { + "External id": 466657,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453883036.035, "dur": 88.085, + "args": { + "External id": 466658,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453883132.647, "dur": 20.234, + "args": { + "External id": 466659,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453883161.268, "dur": 6.376, + "args": { + "External id": 466660,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453883164.973, "dur": 1.107, + "args": { + "External id": 466661,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453883172.747, "dur": 57.417, + "args": { + "External id": 466662,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453883174.127, "dur": 7.164, + "args": { + "External id": 466663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453883177.960, "dur": 2.627, + "args": { + "External id": 466664,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453883179.537, "dur": 0.904, + "args": { + "External id": 466665,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453883181.957, "dur": 47.680, + "args": { + "External id": 466666,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453883182.933, "dur": 45.604, + "args": { + "External id": 466667,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453883234.926, "dur": 4.325, + "args": { + "External id": 466668,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453883237.297, "dur": 0.637, + "args": { + "External id": 466669,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453883246.427, "dur": 1.967, + "args": { + "External id": 466670,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453883259.752, "dur": 7.789, + "args": { + "External id": 466671,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453883262.028, "dur": 5.149, + "args": { + "External id": 466672,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453883367.987, "dur": 207.591, + "args": { + "External id": 466673,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453883373.479, "dur": 4.531, + "args": { + "External id": 466674,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453883380.125, "dur": 194.870, + "args": { + "External id": 466675,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453883381.842, "dur": 0.485, + "args": { + "External id": 466676,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453883386.057, "dur": 22.375, + "args": { + "External id": 466677,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453883410.071, "dur": 5.636, + "args": { + "External id": 466678,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453883414.733, "dur": 0.669, + "args": { + "External id": 466679,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453883416.688, "dur": 22.750, + "args": { + "External id": 466680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453883418.373, "dur": 1.210, + "args": { + "External id": 466681,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453883420.891, "dur": 18.236, + "args": { + "External id": 466682,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453883423.919, "dur": 2.604, + "args": { + "External id": 466683,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453883441.168, "dur": 22.289, + "args": { + "External id": 466684,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453883465.393, "dur": 17.110, + "args": { + "External id": 466685,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453883485.091, "dur": 14.421, + "args": { + "External id": 466686,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453883501.108, "dur": 14.389, + "args": { + "External id": 466687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453883519.619, "dur": 23.915, + "args": { + "External id": 466688,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453883522.451, "dur": 2.277, + "args": { + "External id": 466689,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453883526.588, "dur": 0.725, + "args": { + "External id": 466690,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453883545.038, "dur": 14.744, + "args": { + "External id": 466691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453883560.952, "dur": 12.825, + "args": { + "External id": 466692,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453883582.648, "dur": 1.521, + "args": { + "External id": 466693,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453883610.651, "dur": 5.274, + "args": { + "External id": 466694,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453883613.882, "dur": 0.665, + "args": { + "External id": 466695,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453883697.895, "dur": 65.841, + "args": { + "External id": 466696,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453883768.937, "dur": 6.172, + "args": { + "External id": 466697,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453883772.922, "dur": 0.770, + "args": { + "External id": 466698,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453883776.828, "dur": 26.789, + "args": { + "External id": 466699,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453883809.360, "dur": 8.613, + "args": { + "External id": 466700,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453883811.164, "dur": 5.769, + "args": { + "External id": 466701,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453883815.688, "dur": 1.049, + "args": { + "External id": 466702,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453883821.168, "dur": 46.172, + "args": { + "External id": 466703,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453883822.361, "dur": 44.159, + "args": { + "External id": 466704,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453883871.687, "dur": 16.351, + "args": { + "External id": 466705,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453883894.427, "dur": 4.520, + "args": { + "External id": 466706,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453883897.092, "dur": 0.687, + "args": { + "External id": 466707,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453883903.467, "dur": 50.411, + "args": { + "External id": 466708,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453883907.038, "dur": 3.911, + "args": { + "External id": 466709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453883908.116, "dur": 2.187, + "args": { + "External id": 466710,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453883909.702, "dur": 0.435, + "args": { + "External id": 466711,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453883911.733, "dur": 41.656, + "args": { + "External id": 466712,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453883912.660, "dur": 40.082, + "args": { + "External id": 466713,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453883958.617, "dur": 7.039, + "args": { + "External id": 466714,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453883963.723, "dur": 0.753, + "args": { + "External id": 466715,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453883974.030, "dur": 1.914, + "args": { + "External id": 466716,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453883985.116, "dur": 9.374, + "args": { + "External id": 466717,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453883987.365, "dur": 6.682, + "args": { + "External id": 466718,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453884130.783, "dur": 215.681, + "args": { + "External id": 466719,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453884133.572, "dur": 6.010, + "args": { + "External id": 466720,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453884141.883, "dur": 204.147, + "args": { + "External id": 466721,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453884146.274, "dur": 0.401, + "args": { + "External id": 466722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453884147.938, "dur": 26.852, + "args": { + "External id": 466723,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453884176.895, "dur": 3.777, + "args": { + "External id": 466724,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453884179.403, "dur": 0.994, + "args": { + "External id": 466725,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453884181.799, "dur": 26.030, + "args": { + "External id": 466726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453884183.267, "dur": 1.317, + "args": { + "External id": 466727,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453884185.880, "dur": 21.562, + "args": { + "External id": 466728,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453884191.304, "dur": 3.371, + "args": { + "External id": 466729,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453884209.749, "dur": 22.135, + "args": { + "External id": 466730,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453884233.848, "dur": 16.836, + "args": { + "External id": 466731,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453884256.333, "dur": 15.034, + "args": { + "External id": 466732,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453884272.820, "dur": 14.186, + "args": { + "External id": 466733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453884288.914, "dur": 22.963, + "args": { + "External id": 466734,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453884291.690, "dur": 1.568, + "args": { + "External id": 466735,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453884295.366, "dur": 0.636, + "args": { + "External id": 466736,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453884313.503, "dur": 14.816, + "args": { + "External id": 466737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453884331.862, "dur": 12.969, + "args": { + "External id": 466738,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453884354.708, "dur": 2.365, + "args": { + "External id": 466739,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453884367.337, "dur": 4.605, + "args": { + "External id": 466740,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453884370.238, "dur": 0.641, + "args": { + "External id": 466741,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453884454.300, "dur": 70.663, + "args": { + "External id": 466742,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453884530.442, "dur": 6.096, + "args": { + "External id": 466743,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453884534.070, "dur": 0.926, + "args": { + "External id": 466744,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453884538.231, "dur": 26.675, + "args": { + "External id": 466745,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453884572.302, "dur": 6.195, + "args": { + "External id": 466746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453884574.367, "dur": 3.238, + "args": { + "External id": 466747,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453884576.502, "dur": 0.887, + "args": { + "External id": 466748,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453884581.602, "dur": 73.319, + "args": { + "External id": 466749,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453884583.025, "dur": 70.470, + "args": { + "External id": 466750,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453884661.335, "dur": 17.228, + "args": { + "External id": 466751,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453884686.334, "dur": 7.393, + "args": { + "External id": 466752,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453884691.396, "dur": 0.905, + "args": { + "External id": 466753,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453884698.902, "dur": 56.945, + "args": { + "External id": 466754,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453884700.003, "dur": 6.912, + "args": { + "External id": 466755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453884701.191, "dur": 4.980, + "args": { + "External id": 466756,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453884702.945, "dur": 3.028, + "args": { + "External id": 466757,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453884707.644, "dur": 47.722, + "args": { + "External id": 466758,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453884710.694, "dur": 43.932, + "args": { + "External id": 466759,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453884760.545, "dur": 4.327, + "args": { + "External id": 466760,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453884763.170, "dur": 0.365, + "args": { + "External id": 466761,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453884771.885, "dur": 1.903, + "args": { + "External id": 466762,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453884783.360, "dur": 7.319, + "args": { + "External id": 466763,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453884785.721, "dur": 4.637, + "args": { + "External id": 466764,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453884895.647, "dur": 251.033, + "args": { + "External id": 466765,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453884898.432, "dur": 2.206, + "args": { + "External id": 466766,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453884902.804, "dur": 243.305, + "args": { + "External id": 466767,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453884904.142, "dur": 0.462, + "args": { + "External id": 466768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453884908.487, "dur": 25.375, + "args": { + "External id": 466769,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453884935.766, "dur": 3.315, + "args": { + "External id": 466770,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453884937.989, "dur": 0.701, + "args": { + "External id": 466771,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453884940.093, "dur": 28.538, + "args": { + "External id": 466772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453884941.918, "dur": 3.780, + "args": { + "External id": 466773,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453884949.790, "dur": 18.396, + "args": { + "External id": 466774,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453884952.735, "dur": 2.634, + "args": { + "External id": 466775,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453884970.096, "dur": 22.829, + "args": { + "External id": 466776,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453884994.472, "dur": 13.210, + "args": { + "External id": 466777,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453885010.556, "dur": 14.307, + "args": { + "External id": 466778,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453885026.945, "dur": 13.457, + "args": { + "External id": 466779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453885042.378, "dur": 67.192, + "args": { + "External id": 466780,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453885044.560, "dur": 1.801, + "args": { + "External id": 466781,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453885048.731, "dur": 0.550, + "args": { + "External id": 466782,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453885115.280, "dur": 14.833, + "args": { + "External id": 466783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453885131.427, "dur": 13.109, + "args": { + "External id": 466784,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453885156.924, "dur": 2.635, + "args": { + "External id": 466785,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453885170.182, "dur": 5.240, + "args": { + "External id": 466786,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453885173.734, "dur": 0.617, + "args": { + "External id": 466787,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453885257.159, "dur": 69.628, + "args": { + "External id": 466788,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453885332.804, "dur": 5.687, + "args": { + "External id": 466789,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453885336.258, "dur": 0.808, + "args": { + "External id": 466790,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453885342.247, "dur": 30.373, + "args": { + "External id": 466791,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453885378.294, "dur": 7.044, + "args": { + "External id": 466792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453885380.223, "dur": 4.099, + "args": { + "External id": 466793,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453885382.755, "dur": 1.321, + "args": { + "External id": 466794,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453885388.247, "dur": 48.744, + "args": { + "External id": 466795,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453885389.625, "dur": 46.522, + "args": { + "External id": 466796,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453885441.422, "dur": 18.521, + "args": { + "External id": 466797,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453885467.979, "dur": 29.702, + "args": { + "External id": 466798,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453885470.645, "dur": 26.604, + "args": { + "External id": 466799,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453885478.657, "dur": 0.564, + "args": { + "External id": 466800,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453885503.780, "dur": 31.462, + "args": { + "External id": 466801,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453885505.875, "dur": 29.059, + "args": { + "External id": 466802,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 19894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453885510.918, "dur": 4.597, + "args": { + "External id": 466803,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453885516.936, "dur": 17.451, + "args": { + "External id": 466804,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2338707, + "ts": 6267453885549.026, "dur": 8.415, + "args": { + "External id": 466805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2338707, + "ts": 6267453885553.719, "dur": 3.184, + "args": { + "External id": 466806,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2338707, + "ts": 6267453885558.657, "dur": 1.541, + "args": { + "External id": 466807,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2338707, + "ts": 6267453885559.240, "dur": 0.629, + "args": { + "External id": 466808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453885624.408, "dur": 31.026, + "args": { + "External id": 466809,"Sequence number": 5036803, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 19901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453885657.855, "dur": 16.808, + "args": { + "External id": 466810,"Sequence number": 5036804, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 19902 + } + }, + { + "ph": "s", "id": 240, "pid": 2338707, "tid": 2338707, "ts": 6267453885657.855, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453885682.018, "dur": 8.850, + "args": { + "External id": 466811,"Sequence number": 5036805, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 19903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453885687.002, "dur": 1.590, + "args": { + "External id": 466812,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2338707, + "ts": 6267453885694.002, "dur": 9.067, + "args": { + "External id": 466813,"Sequence number": 5036805, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 19905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453885700.878, "dur": 0.595, + "args": { + "External id": 466814,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453885704.721, "dur": 3.080, + "args": { + "External id": 466815,"Sequence number": 5036805, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 19907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453885706.671, "dur": 0.409, + "args": { + "External id": 466816,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 19908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453885712.428, "dur": 6.346, + "args": { + "External id": 466817,"Sequence number": 5036805, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19909 + } + }, + { + "ph": "s", "id": 239, "pid": 2338707, "tid": 2338707, "ts": 6267453885712.428, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453885716.294, "dur": 0.921, + "args": { + "External id": 466818,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453885722.630, "dur": 4.893, + "args": { + "External id": 466819,"Sequence number": 5036806, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19911 + } + }, + { + "ph": "s", "id": 238, "pid": 2338707, "tid": 2338707, "ts": 6267453885722.630, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453885725.938, "dur": 0.598, + "args": { + "External id": 466820,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2338707, + "ts": 6267453885728.841, "dur": 5.903, + "args": { + "External id": 466821,"Sequence number": 5036807, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 19913 + } + }, + { + "ph": "s", "id": 237, "pid": 2338707, "tid": 2338707, "ts": 6267453885728.841, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453885732.625, "dur": 0.913, + "args": { + "External id": 466822,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453885735.908, "dur": 8.524, + "args": { + "External id": 466823,"Sequence number": 5036808, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 19915 + } + }, + { + "ph": "s", "id": 236, "pid": 2338707, "tid": 2338707, "ts": 6267453885735.908, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453885739.736, "dur": 3.785, + "args": { + "External id": 466824,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 19916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267453885748.566, "dur": 39.208, + "args": { + "External id": 466825,"Sequence number": 5036809, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267453885753.043, "dur": 34.454, + "args": { + "External id": 466826,"Sequence number": 5036809, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453885756.276, "dur": 8.125, + "args": { + "External id": 466827,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 19919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453885759.018, "dur": 4.690, + "args": { + "External id": 466828,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453885765.377, "dur": 21.520, + "args": { + "External id": 466829,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 19921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453885818.989, "dur": 4.675, + "args": { + "External id": 466830,"Sequence number": 5036809, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 19922 + } + }, + { + "ph": "s", "id": 235, "pid": 2338707, "tid": 2338707, "ts": 6267453885818.989, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453885826.484, "dur": 1.224, + "args": { + "External id": 466831,"Sequence number": 5036810, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453885867.259, "dur": 46492.610, + "args": { + "External id": 466832,"Sequence number": 5036810, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 19924 + } + }, + { + "ph": "s", "id": 234, "pid": 2338707, "tid": 2338707, "ts": 6267453885867.259, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267453885886.059, "dur": 36.619, + "args": { + "External id": 466833,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267453885892.054, "dur": 30.363, + "args": { + "External id": 466834,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453885896.153, "dur": 5.751, + "args": { + "External id": 466835,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453885897.610, "dur": 3.623, + "args": { + "External id": 466836,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453885902.649, "dur": 19.226, + "args": { + "External id": 466837,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 19929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453885941.708, "dur": 31.282, + "args": { + "External id": 466838,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453885942.966, "dur": 6.956, + "args": { + "External id": 466839,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453885945.339, "dur": 4.210, + "args": { + "External id": 466840,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267453885951.485, "dur": 21.263, + "args": { + "External id": 466841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453885956.722, "dur": 15.608, + "args": { + "External id": 466842,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453885977.167, "dur": 21.343, + "args": { + "External id": 466843,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453885978.193, "dur": 4.742, + "args": { + "External id": 466844,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453885979.672, "dur": 2.992, + "args": { + "External id": 466845,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267453885983.637, "dur": 14.648, + "args": { + "External id": 466846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453885984.315, "dur": 13.583, + "args": { + "External id": 466847,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 19939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2338707, + "ts": 6267453886005.223, "dur": 24.953, + "args": { + "External id": 466848,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453886007.388, "dur": 3.408, + "args": { + "External id": 466849,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267453886011.640, "dur": 18.196, + "args": { + "External id": 466850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 19942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453886015.130, "dur": 14.328, + "args": { + "External id": 466851,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2338707, + "ts": 6267453886035.873, "dur": 72.076, + "args": { + "External id": 466852,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453886113.462, "dur": 63.093, + "args": { + "External id": 466853,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453886116.382, "dur": 59.611, + "args": { + "External id": 466854,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453886122.607, "dur": 1.253, + "args": { + "External id": 466855,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453886125.574, "dur": 30.030, + "args": { + "External id": 466856,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453886127.597, "dur": 27.752, + "args": { + "External id": 466857,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 19949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453886130.826, "dur": 5.493, + "args": { + "External id": 466858,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453886137.348, "dur": 17.545, + "args": { + "External id": 466859,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 19951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2338707, + "ts": 6267453886184.293, "dur": 39277.231, + "args": { + "External id": 466860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2338707, + "ts": 6267453886186.108, "dur": 39273.677, + "args": { + "External id": 466861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453925478.587, "dur": 11.023, + "args": { + "External id": 466862,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453925485.005, "dur": 1.142, + "args": { + "External id": 466863,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453925497.112, "dur": 148.892, + "args": { + "External id": 466864,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453925498.835, "dur": 9.233, + "args": { + "External id": 466865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453925502.601, "dur": 4.235, + "args": { + "External id": 466866,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453925504.935, "dur": 0.756, + "args": { + "External id": 466867,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453925512.065, "dur": 133.004, + "args": { + "External id": 466868,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453925514.148, "dur": 129.458, + "args": { + "External id": 466869,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453925653.075, "dur": 7.505, + "args": { + "External id": 466870,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453925657.446, "dur": 0.640, + "args": { + "External id": 466871,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453925672.254, "dur": 3.496, + "args": { + "External id": 466872,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453925689.812, "dur": 12.903, + "args": { + "External id": 466873,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453925696.029, "dur": 6.333, + "args": { + "External id": 466874,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453925861.736, "dur": 300.610, + "args": { + "External id": 466875,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453925867.190, "dur": 2.535, + "args": { + "External id": 466876,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453925872.034, "dur": 289.816, + "args": { + "External id": 466877,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453925874.569, "dur": 0.681, + "args": { + "External id": 466878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453925876.797, "dur": 37.576, + "args": { + "External id": 466879,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453925916.471, "dur": 4.402, + "args": { + "External id": 466880,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453925919.353, "dur": 1.104, + "args": { + "External id": 466881,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453925922.173, "dur": 38.776, + "args": { + "External id": 466882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453925931.011, "dur": 3.885, + "args": { + "External id": 466883,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453925936.440, "dur": 24.198, + "args": { + "External id": 466884,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453925941.002, "dur": 3.994, + "args": { + "External id": 466885,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453925962.923, "dur": 27.496, + "args": { + "External id": 466886,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453925992.658, "dur": 18.125, + "args": { + "External id": 466887,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453926014.036, "dur": 20.067, + "args": { + "External id": 466888,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453926036.058, "dur": 49.872, + "args": { + "External id": 466889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453926090.823, "dur": 33.553, + "args": { + "External id": 466890,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453926093.897, "dur": 2.508, + "args": { + "External id": 466891,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453926101.423, "dur": 0.940, + "args": { + "External id": 466892,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453926126.615, "dur": 16.576, + "args": { + "External id": 466893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453926144.777, "dur": 15.496, + "args": { + "External id": 466894,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453926173.517, "dur": 2.913, + "args": { + "External id": 466895,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453926185.432, "dur": 5.776, + "args": { + "External id": 466896,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453926189.470, "dur": 0.602, + "args": { + "External id": 466897,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453926287.480, "dur": 93.185, + "args": { + "External id": 466898,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453926389.677, "dur": 8.460, + "args": { + "External id": 466899,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453926394.547, "dur": 0.719, + "args": { + "External id": 466900,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453926400.189, "dur": 36.035, + "args": { + "External id": 466901,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453926443.466, "dur": 8.692, + "args": { + "External id": 466902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453926445.711, "dur": 5.552, + "args": { + "External id": 466903,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453926449.551, "dur": 1.427, + "args": { + "External id": 466904,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453926458.962, "dur": 57.798, + "args": { + "External id": 466905,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453926460.487, "dur": 55.424, + "args": { + "External id": 466906,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453926522.196, "dur": 21.089, + "args": { + "External id": 466907,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453926550.563, "dur": 5.108, + "args": { + "External id": 466908,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453926553.345, "dur": 0.496, + "args": { + "External id": 466909,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453926561.285, "dur": 83.980, + "args": { + "External id": 466910,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453926562.518, "dur": 9.479, + "args": { + "External id": 466911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453926566.230, "dur": 5.021, + "args": { + "External id": 466912,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453926570.351, "dur": 0.711, + "args": { + "External id": 466913,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453926572.710, "dur": 71.815, + "args": { + "External id": 466914,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453926573.599, "dur": 69.632, + "args": { + "External id": 466915,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453926651.798, "dur": 6.500, + "args": { + "External id": 466916,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453926654.858, "dur": 0.725, + "args": { + "External id": 466917,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453926669.541, "dur": 2.092, + "args": { + "External id": 466918,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453926683.041, "dur": 8.814, + "args": { + "External id": 466919,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453926686.412, "dur": 5.128, + "args": { + "External id": 466920,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453926809.757, "dur": 232.094, + "args": { + "External id": 466921,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453926812.747, "dur": 3.271, + "args": { + "External id": 466922,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453926817.789, "dur": 223.560, + "args": { + "External id": 466923,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453926824.772, "dur": 0.453, + "args": { + "External id": 466924,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453926826.593, "dur": 28.210, + "args": { + "External id": 466925,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453926856.708, "dur": 3.915, + "args": { + "External id": 466926,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453926859.553, "dur": 0.815, + "args": { + "External id": 466927,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453926861.794, "dur": 29.139, + "args": { + "External id": 466928,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453926865.512, "dur": 1.472, + "args": { + "External id": 466929,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453926868.647, "dur": 21.970, + "args": { + "External id": 466930,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453926872.523, "dur": 3.330, + "args": { + "External id": 466931,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453926892.636, "dur": 27.739, + "args": { + "External id": 466932,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453926922.209, "dur": 18.370, + "args": { + "External id": 466933,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453926946.545, "dur": 15.832, + "args": { + "External id": 466934,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453926964.348, "dur": 15.850, + "args": { + "External id": 466935,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453926982.738, "dur": 26.989, + "args": { + "External id": 466936,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453926985.186, "dur": 2.147, + "args": { + "External id": 466937,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453926990.559, "dur": 0.931, + "args": { + "External id": 466938,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453927011.600, "dur": 15.085, + "args": { + "External id": 466939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453927028.043, "dur": 11.850, + "args": { + "External id": 466940,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453927088.849, "dur": 4.631, + "args": { + "External id": 466941,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453927108.549, "dur": 6.280, + "args": { + "External id": 466942,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453927113.219, "dur": 0.534, + "args": { + "External id": 466943,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453927204.237, "dur": 74.973, + "args": { + "External id": 466944,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453927326.627, "dur": 10.069, + "args": { + "External id": 466945,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453927331.816, "dur": 3.446, + "args": { + "External id": 466946,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453927338.567, "dur": 32.017, + "args": { + "External id": 466947,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453927379.386, "dur": 9.207, + "args": { + "External id": 466948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453927381.212, "dur": 6.309, + "args": { + "External id": 466949,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453927386.337, "dur": 0.932, + "args": { + "External id": 466950,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453927392.034, "dur": 51.436, + "args": { + "External id": 466951,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453927393.207, "dur": 49.572, + "args": { + "External id": 466952,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453927448.642, "dur": 20.518, + "args": { + "External id": 466953,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453927476.011, "dur": 6.676, + "args": { + "External id": 466954,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453927480.898, "dur": 0.737, + "args": { + "External id": 466955,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453927487.967, "dur": 53.361, + "args": { + "External id": 466956,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453927488.874, "dur": 4.271, + "args": { + "External id": 466957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453927489.683, "dur": 2.517, + "args": { + "External id": 466958,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453927491.219, "dur": 0.826, + "args": { + "External id": 466959,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453927493.823, "dur": 46.996, + "args": { + "External id": 466960,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453927496.932, "dur": 43.213, + "args": { + "External id": 466961,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453927546.388, "dur": 4.172, + "args": { + "External id": 466962,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453927548.673, "dur": 0.597, + "args": { + "External id": 466963,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453927558.264, "dur": 2.019, + "args": { + "External id": 466964,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453927569.851, "dur": 11.727, + "args": { + "External id": 466965,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453927572.214, "dur": 9.049, + "args": { + "External id": 466966,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453927717.441, "dur": 215.575, + "args": { + "External id": 466967,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453927721.242, "dur": 3.432, + "args": { + "External id": 466968,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453927727.028, "dur": 205.617, + "args": { + "External id": 466969,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453927728.984, "dur": 0.370, + "args": { + "External id": 466970,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453927730.543, "dur": 33.501, + "args": { + "External id": 466971,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453927766.021, "dur": 3.241, + "args": { + "External id": 466972,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453927768.263, "dur": 0.733, + "args": { + "External id": 466973,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453927770.618, "dur": 31.012, + "args": { + "External id": 466974,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453927771.979, "dur": 1.591, + "args": { + "External id": 466975,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453927775.240, "dur": 26.036, + "args": { + "External id": 466976,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453927783.448, "dur": 3.186, + "args": { + "External id": 466977,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453927803.371, "dur": 23.599, + "args": { + "External id": 466978,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453927829.415, "dur": 14.980, + "args": { + "External id": 466979,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453927847.397, "dur": 15.431, + "args": { + "External id": 466980,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453927864.418, "dur": 12.853, + "args": { + "External id": 466981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453927879.427, "dur": 21.727, + "args": { + "External id": 466982,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453927882.189, "dur": 1.496, + "args": { + "External id": 466983,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453927885.843, "dur": 0.685, + "args": { + "External id": 466984,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453927904.745, "dur": 13.394, + "args": { + "External id": 466985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453927919.332, "dur": 12.022, + "args": { + "External id": 466986,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453927941.303, "dur": 2.609, + "args": { + "External id": 466987,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453927955.004, "dur": 4.078, + "args": { + "External id": 466988,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453927957.609, "dur": 0.514, + "args": { + "External id": 466989,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453928032.855, "dur": 112.887, + "args": { + "External id": 466990,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453928154.553, "dur": 10.811, + "args": { + "External id": 466991,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453928161.840, "dur": 1.480, + "args": { + "External id": 466992,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453928166.982, "dur": 34.807, + "args": { + "External id": 466993,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453928207.910, "dur": 6.675, + "args": { + "External id": 466994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453928209.731, "dur": 3.828, + "args": { + "External id": 466995,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453928212.449, "dur": 0.880, + "args": { + "External id": 466996,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453928217.637, "dur": 52.235, + "args": { + "External id": 466997,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453928218.749, "dur": 50.225, + "args": { + "External id": 466998,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453928276.215, "dur": 17.755, + "args": { + "External id": 466999,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453928301.121, "dur": 4.109, + "args": { + "External id": 467000,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453928303.536, "dur": 0.756, + "args": { + "External id": 467001,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453928310.275, "dur": 56.531, + "args": { + "External id": 467002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453928311.387, "dur": 9.020, + "args": { + "External id": 467003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453928312.360, "dur": 7.316, + "args": { + "External id": 467004,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453928316.236, "dur": 3.030, + "args": { + "External id": 467005,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453928321.223, "dur": 45.095, + "args": { + "External id": 467006,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453928322.411, "dur": 43.267, + "args": { + "External id": 467007,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453928371.263, "dur": 4.503, + "args": { + "External id": 467008,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453928373.712, "dur": 0.805, + "args": { + "External id": 467009,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453928383.183, "dur": 2.012, + "args": { + "External id": 467010,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453928397.390, "dur": 9.577, + "args": { + "External id": 467011,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453928399.510, "dur": 7.097, + "args": { + "External id": 467012,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453928507.792, "dur": 237.875, + "args": { + "External id": 467013,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453928510.498, "dur": 1.962, + "args": { + "External id": 467014,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453928514.490, "dur": 230.716, + "args": { + "External id": 467015,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453928516.050, "dur": 0.532, + "args": { + "External id": 467016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453928521.214, "dur": 23.879, + "args": { + "External id": 467017,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453928550.104, "dur": 3.564, + "args": { + "External id": 467018,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453928552.562, "dur": 0.894, + "args": { + "External id": 467019,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453928554.751, "dur": 27.513, + "args": { + "External id": 467020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453928556.590, "dur": 2.958, + "args": { + "External id": 467021,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453928560.975, "dur": 20.823, + "args": { + "External id": 467022,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453928564.170, "dur": 2.795, + "args": { + "External id": 467023,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453928583.666, "dur": 43.938, + "args": { + "External id": 467024,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453928630.863, "dur": 17.232, + "args": { + "External id": 467025,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453928651.159, "dur": 15.715, + "args": { + "External id": 467026,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453928668.434, "dur": 15.268, + "args": { + "External id": 467027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453928685.826, "dur": 28.351, + "args": { + "External id": 467028,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453928691.401, "dur": 2.436, + "args": { + "External id": 467029,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453928695.962, "dur": 0.713, + "args": { + "External id": 467030,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453928715.858, "dur": 14.688, + "args": { + "External id": 467031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453928731.624, "dur": 12.130, + "args": { + "External id": 467032,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453928754.736, "dur": 2.578, + "args": { + "External id": 467033,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453928767.635, "dur": 4.561, + "args": { + "External id": 467034,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453928770.707, "dur": 0.575, + "args": { + "External id": 467035,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453928851.625, "dur": 65.833, + "args": { + "External id": 467036,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453928925.738, "dur": 5.376, + "args": { + "External id": 467037,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453928928.795, "dur": 0.949, + "args": { + "External id": 467038,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453928933.044, "dur": 26.701, + "args": { + "External id": 467039,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453928964.956, "dur": 8.081, + "args": { + "External id": 467040,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453928966.824, "dur": 5.392, + "args": { + "External id": 467041,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453928968.844, "dur": 3.129, + "args": { + "External id": 467042,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453928978.526, "dur": 48.926, + "args": { + "External id": 467043,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453928979.896, "dur": 46.895, + "args": { + "External id": 467044,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453929031.401, "dur": 18.151, + "args": { + "External id": 467045,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453929092.291, "dur": 7.645, + "args": { + "External id": 467046,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453929097.335, "dur": 0.988, + "args": { + "External id": 467047,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453929106.351, "dur": 64.461, + "args": { + "External id": 467048,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453929107.252, "dur": 7.244, + "args": { + "External id": 467049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453929111.094, "dur": 2.711, + "args": { + "External id": 467050,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453929112.679, "dur": 0.962, + "args": { + "External id": 467051,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453929115.193, "dur": 55.230, + "args": { + "External id": 467052,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453929116.365, "dur": 53.227, + "args": { + "External id": 467053,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453929177.209, "dur": 4.383, + "args": { + "External id": 467054,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453929179.521, "dur": 0.718, + "args": { + "External id": 467055,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453929188.438, "dur": 1.775, + "args": { + "External id": 467056,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453929202.482, "dur": 7.711, + "args": { + "External id": 467057,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453929204.631, "dur": 5.266, + "args": { + "External id": 467058,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453929316.685, "dur": 230.845, + "args": { + "External id": 467059,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453929319.098, "dur": 2.246, + "args": { + "External id": 467060,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453929326.272, "dur": 220.847, + "args": { + "External id": 467061,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453929327.681, "dur": 0.446, + "args": { + "External id": 467062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453929329.168, "dur": 26.107, + "args": { + "External id": 467063,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453929359.336, "dur": 5.536, + "args": { + "External id": 467064,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453929361.561, "dur": 3.020, + "args": { + "External id": 467065,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453929365.790, "dur": 25.829, + "args": { + "External id": 467066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453929367.424, "dur": 1.380, + "args": { + "External id": 467067,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453929370.174, "dur": 21.077, + "args": { + "External id": 467068,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453929373.450, "dur": 3.616, + "args": { + "External id": 467069,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453929393.260, "dur": 26.653, + "args": { + "External id": 467070,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453929421.512, "dur": 15.944, + "args": { + "External id": 467071,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453929448.066, "dur": 19.823, + "args": { + "External id": 467072,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453929469.281, "dur": 14.561, + "args": { + "External id": 467073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453929488.281, "dur": 26.256, + "args": { + "External id": 467074,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453929493.463, "dur": 1.423, + "args": { + "External id": 467075,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453929497.524, "dur": 0.924, + "args": { + "External id": 467076,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453929515.823, "dur": 15.114, + "args": { + "External id": 467077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453929531.995, "dur": 13.898, + "args": { + "External id": 467078,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453929555.503, "dur": 2.077, + "args": { + "External id": 467079,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453929571.236, "dur": 4.408, + "args": { + "External id": 467080,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453929573.961, "dur": 0.678, + "args": { + "External id": 467081,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453929676.416, "dur": 73.444, + "args": { + "External id": 467082,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453929756.460, "dur": 5.924, + "args": { + "External id": 467083,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453929759.819, "dur": 0.950, + "args": { + "External id": 467084,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453929764.426, "dur": 32.235, + "args": { + "External id": 467085,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453929802.671, "dur": 9.282, + "args": { + "External id": 467086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453929804.567, "dur": 6.345, + "args": { + "External id": 467087,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453929809.489, "dur": 1.170, + "args": { + "External id": 467088,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453929815.289, "dur": 48.661, + "args": { + "External id": 467089,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453929816.289, "dur": 46.875, + "args": { + "External id": 467090,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453929868.731, "dur": 18.718, + "args": { + "External id": 467091,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453929893.874, "dur": 3.968, + "args": { + "External id": 467092,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453929896.327, "dur": 0.482, + "args": { + "External id": 467093,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453929908.421, "dur": 50.931, + "args": { + "External id": 467094,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453929909.284, "dur": 3.694, + "args": { + "External id": 467095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453929909.904, "dur": 2.417, + "args": { + "External id": 467096,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453929911.344, "dur": 0.805, + "args": { + "External id": 467097,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453929913.671, "dur": 45.203, + "args": { + "External id": 467098,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453929914.205, "dur": 43.864, + "args": { + "External id": 467099,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453929966.038, "dur": 4.547, + "args": { + "External id": 467100,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453929968.676, "dur": 0.468, + "args": { + "External id": 467101,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453929976.738, "dur": 1.759, + "args": { + "External id": 467102,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453929987.824, "dur": 11.640, + "args": { + "External id": 467103,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453929989.761, "dur": 9.365, + "args": { + "External id": 467104,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453930148.956, "dur": 219.610, + "args": { + "External id": 467105,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453930152.242, "dur": 3.818, + "args": { + "External id": 467106,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453930158.223, "dur": 209.706, + "args": { + "External id": 467107,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453930160.044, "dur": 0.413, + "args": { + "External id": 467108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453930161.693, "dur": 29.515, + "args": { + "External id": 467109,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453930193.288, "dur": 3.473, + "args": { + "External id": 467110,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453930195.592, "dur": 0.911, + "args": { + "External id": 467111,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453930197.788, "dur": 32.155, + "args": { + "External id": 467112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453930199.609, "dur": 1.744, + "args": { + "External id": 467113,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453930202.741, "dur": 26.755, + "args": { + "External id": 467114,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453930211.205, "dur": 3.521, + "args": { + "External id": 467115,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453930231.824, "dur": 24.334, + "args": { + "External id": 467116,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453930257.895, "dur": 16.120, + "args": { + "External id": 467117,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453930276.906, "dur": 15.789, + "args": { + "External id": 467118,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453930294.246, "dur": 13.982, + "args": { + "External id": 467119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453930310.039, "dur": 24.123, + "args": { + "External id": 467120,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453930312.568, "dur": 1.985, + "args": { + "External id": 467121,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453930316.825, "dur": 0.806, + "args": { + "External id": 467122,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453930338.486, "dur": 14.287, + "args": { + "External id": 467123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453930353.859, "dur": 12.763, + "args": { + "External id": 467124,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453930376.550, "dur": 2.512, + "args": { + "External id": 467125,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453930389.883, "dur": 4.074, + "args": { + "External id": 467126,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453930392.252, "dur": 0.567, + "args": { + "External id": 467127,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453930473.959, "dur": 67.502, + "args": { + "External id": 467128,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453930547.239, "dur": 6.027, + "args": { + "External id": 467129,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453930551.030, "dur": 0.948, + "args": { + "External id": 467130,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453930555.105, "dur": 30.511, + "args": { + "External id": 467131,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453930609.656, "dur": 9.283, + "args": { + "External id": 467132,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453930613.735, "dur": 4.073, + "args": { + "External id": 467133,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453930616.349, "dur": 1.074, + "args": { + "External id": 467134,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453930622.910, "dur": 57.643, + "args": { + "External id": 467135,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453930624.313, "dur": 55.533, + "args": { + "External id": 467136,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453930685.370, "dur": 19.590, + "args": { + "External id": 467137,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453930711.835, "dur": 6.970, + "args": { + "External id": 467138,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453930717.070, "dur": 0.546, + "args": { + "External id": 467139,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453930723.752, "dur": 55.928, + "args": { + "External id": 467140,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453930724.735, "dur": 6.253, + "args": { + "External id": 467141,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453930725.619, "dur": 4.678, + "args": { + "External id": 467142,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453930727.221, "dur": 2.872, + "args": { + "External id": 467143,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453930731.676, "dur": 47.300, + "args": { + "External id": 467144,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453930734.916, "dur": 43.254, + "args": { + "External id": 467145,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453930784.733, "dur": 6.875, + "args": { + "External id": 467146,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453930787.255, "dur": 3.002, + "args": { + "External id": 467147,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453930799.648, "dur": 1.716, + "args": { + "External id": 467148,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453930810.556, "dur": 6.330, + "args": { + "External id": 467149,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453930812.475, "dur": 4.115, + "args": { + "External id": 467150,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453930916.497, "dur": 252.249, + "args": { + "External id": 467151,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453930919.222, "dur": 1.804, + "args": { + "External id": 467152,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453930923.210, "dur": 245.028, + "args": { + "External id": 467153,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453930924.624, "dur": 0.431, + "args": { + "External id": 467154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453930928.810, "dur": 27.487, + "args": { + "External id": 467155,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453930958.015, "dur": 3.112, + "args": { + "External id": 467156,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453930960.376, "dur": 0.510, + "args": { + "External id": 467157,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453930962.079, "dur": 28.826, + "args": { + "External id": 467158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453930963.376, "dur": 3.698, + "args": { + "External id": 467159,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453930968.278, "dur": 22.145, + "args": { + "External id": 467160,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453930973.571, "dur": 3.499, + "args": { + "External id": 467161,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453930992.389, "dur": 25.878, + "args": { + "External id": 467162,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453931020.077, "dur": 14.663, + "args": { + "External id": 467163,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453931037.743, "dur": 16.313, + "args": { + "External id": 467164,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453931092.380, "dur": 17.963, + "args": { + "External id": 467165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453931113.631, "dur": 22.453, + "args": { + "External id": 467166,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453931116.071, "dur": 2.108, + "args": { + "External id": 467167,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453931121.018, "dur": 0.767, + "args": { + "External id": 467168,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453931140.868, "dur": 12.722, + "args": { + "External id": 467169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453931154.674, "dur": 12.398, + "args": { + "External id": 467170,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453931178.178, "dur": 2.557, + "args": { + "External id": 467171,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453931192.511, "dur": 4.493, + "args": { + "External id": 467172,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453931195.521, "dur": 0.419, + "args": { + "External id": 467173,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453931273.788, "dur": 65.419, + "args": { + "External id": 467174,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453931345.355, "dur": 7.585, + "args": { + "External id": 467175,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453931350.745, "dur": 0.793, + "args": { + "External id": 467176,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453931354.726, "dur": 27.699, + "args": { + "External id": 467177,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453931387.198, "dur": 6.226, + "args": { + "External id": 467178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453931389.015, "dur": 3.583, + "args": { + "External id": 467179,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453931391.168, "dur": 1.204, + "args": { + "External id": 467180,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453931396.248, "dur": 53.489, + "args": { + "External id": 467181,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453931397.545, "dur": 51.523, + "args": { + "External id": 467182,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453931456.311, "dur": 15.214, + "args": { + "External id": 467183,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453931478.372, "dur": 3.933, + "args": { + "External id": 467184,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453931480.715, "dur": 0.430, + "args": { + "External id": 467185,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453931486.919, "dur": 55.001, + "args": { + "External id": 467186,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453931488.019, "dur": 9.559, + "args": { + "External id": 467187,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453931489.160, "dur": 7.720, + "args": { + "External id": 467188,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453931496.077, "dur": 0.650, + "args": { + "External id": 467189,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453931498.796, "dur": 42.627, + "args": { + "External id": 467190,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453931499.547, "dur": 41.077, + "args": { + "External id": 467191,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453931546.118, "dur": 4.357, + "args": { + "External id": 467192,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453931548.412, "dur": 0.580, + "args": { + "External id": 467193,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453931560.066, "dur": 1.803, + "args": { + "External id": 467194,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453931573.459, "dur": 7.240, + "args": { + "External id": 467195,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453931575.688, "dur": 4.652, + "args": { + "External id": 467196,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453931695.686, "dur": 203.547, + "args": { + "External id": 467197,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453931701.973, "dur": 5.194, + "args": { + "External id": 467198,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453931709.157, "dur": 189.496, + "args": { + "External id": 467199,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453931710.711, "dur": 0.502, + "args": { + "External id": 467200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453931712.594, "dur": 24.436, + "args": { + "External id": 467201,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453931741.372, "dur": 5.953, + "args": { + "External id": 467202,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453931746.248, "dur": 0.772, + "args": { + "External id": 467203,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453931748.280, "dur": 22.742, + "args": { + "External id": 467204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453931749.542, "dur": 1.260, + "args": { + "External id": 467205,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453931752.335, "dur": 18.227, + "args": { + "External id": 467206,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453931755.422, "dur": 2.485, + "args": { + "External id": 467207,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453931772.559, "dur": 22.400, + "args": { + "External id": 467208,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453931796.460, "dur": 17.131, + "args": { + "External id": 467209,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453931816.335, "dur": 13.396, + "args": { + "External id": 467210,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453931831.414, "dur": 12.591, + "args": { + "External id": 467211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453931847.270, "dur": 23.676, + "args": { + "External id": 467212,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453931852.252, "dur": 1.901, + "args": { + "External id": 467213,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453931856.589, "dur": 0.579, + "args": { + "External id": 467214,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453931872.648, "dur": 12.180, + "args": { + "External id": 467215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453931885.813, "dur": 11.483, + "args": { + "External id": 467216,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453931906.491, "dur": 2.125, + "args": { + "External id": 467217,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453931918.429, "dur": 4.293, + "args": { + "External id": 467218,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453931921.260, "dur": 0.467, + "args": { + "External id": 467219,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453931993.118, "dur": 61.947, + "args": { + "External id": 467220,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453932103.493, "dur": 7.719, + "args": { + "External id": 467221,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453932107.688, "dur": 1.432, + "args": { + "External id": 467222,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453932112.781, "dur": 33.714, + "args": { + "External id": 467223,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453932152.087, "dur": 7.319, + "args": { + "External id": 467224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453932154.018, "dur": 4.537, + "args": { + "External id": 467225,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453932156.694, "dur": 1.642, + "args": { + "External id": 467226,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453932164.976, "dur": 54.783, + "args": { + "External id": 467227,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453932166.304, "dur": 52.822, + "args": { + "External id": 467228,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453932223.714, "dur": 16.774, + "args": { + "External id": 467229,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453932245.764, "dur": 29.027, + "args": { + "External id": 467230,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453932248.471, "dur": 25.768, + "args": { + "External id": 467231,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453932254.959, "dur": 2.726, + "args": { + "External id": 467232,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 20324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453932281.065, "dur": 29.444, + "args": { + "External id": 467233,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453932283.360, "dur": 26.872, + "args": { + "External id": 467234,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 20326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453932290.406, "dur": 3.783, + "args": { + "External id": 467235,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453932295.521, "dur": 14.218, + "args": { + "External id": 467236,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2338707, + "ts": 6267453932322.645, "dur": 5.546, + "args": { + "External id": 467237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2338707, + "ts": 6267453932324.860, "dur": 3.008, + "args": { + "External id": 467238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2338707, + "ts": 6267453932329.575, "dur": 1.659, + "args": { + "External id": 467239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2338707, + "ts": 6267453932330.350, "dur": 0.521, + "args": { + "External id": 467240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453932381.614, "dur": 25.844, + "args": { + "External id": 467241,"Sequence number": 5036811, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453932409.677, "dur": 14.937, + "args": { + "External id": 467242,"Sequence number": 5036812, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20334 + } + }, + { + "ph": "s", "id": 233, "pid": 2338707, "tid": 2338707, "ts": 6267453932409.677, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453932431.636, "dur": 7.359, + "args": { + "External id": 467243,"Sequence number": 5036813, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 20335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453932436.204, "dur": 1.087, + "args": { + "External id": 467244,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 20336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2338707, + "ts": 6267453932442.234, "dur": 6.761, + "args": { + "External id": 467245,"Sequence number": 5036813, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "3"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 20337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453932446.981, "dur": 0.629, + "args": { + "External id": 467246,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 20338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453932450.438, "dur": 5.086, + "args": { + "External id": 467247,"Sequence number": 5036813, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 20339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453932454.386, "dur": 0.438, + "args": { + "External id": 467248,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 20340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453932460.290, "dur": 6.902, + "args": { + "External id": 467249,"Sequence number": 5036813, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 20341 + } + }, + { + "ph": "s", "id": 232, "pid": 2338707, "tid": 2338707, "ts": 6267453932460.290, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453932464.680, "dur": 0.976, + "args": { + "External id": 467250,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 20342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453932468.279, "dur": 5.355, + "args": { + "External id": 467251,"Sequence number": 5036814, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 20343 + } + }, + { + "ph": "s", "id": 231, "pid": 2338707, "tid": 2338707, "ts": 6267453932468.279, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453932472.168, "dur": 0.520, + "args": { + "External id": 467252,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 20344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338707, "tid": 2338707, + "ts": 6267453932477.343, "dur": 6.256, + "args": { + "External id": 467253,"Sequence number": 5036815, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 20345 + } + }, + { + "ph": "s", "id": 230, "pid": 2338707, "tid": 2338707, "ts": 6267453932477.343, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453932481.912, "dur": 0.470, + "args": { + "External id": 467254,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 20346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453932484.898, "dur": 5.495, + "args": { + "External id": 467255,"Sequence number": 5036816, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 20347 + } + }, + { + "ph": "s", "id": 229, "pid": 2338707, "tid": 2338707, "ts": 6267453932484.898, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453932488.545, "dur": 0.987, + "args": { + "External id": 467256,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 20348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267453932494.802, "dur": 39.234, + "args": { + "External id": 467257,"Sequence number": 5036817, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 20349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267453932496.511, "dur": 37.224, + "args": { + "External id": 467258,"Sequence number": 5036817, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 20350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453932499.565, "dur": 11.507, + "args": { + "External id": 467259,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 20351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453932504.430, "dur": 5.995, + "args": { + "External id": 467260,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453932515.112, "dur": 17.920, + "args": { + "External id": 467261,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 20353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453932564.575, "dur": 4.741, + "args": { + "External id": 467262,"Sequence number": 5036817, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 20354 + } + }, + { + "ph": "s", "id": 228, "pid": 2338707, "tid": 2338707, "ts": 6267453932564.575, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453932572.040, "dur": 1.489, + "args": { + "External id": 467263,"Sequence number": 5036818, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 20355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338707, "tid": 2338707, + "ts": 6267453932630.049, "dur": 46582.608, + "args": { + "External id": 467264,"Sequence number": 5036818, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 20356 + } + }, + { + "ph": "s", "id": 227, "pid": 2338707, "tid": 2338707, "ts": 6267453932630.049, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338707, "tid": 2338707, + "ts": 6267453932650.767, "dur": 38.155, + "args": { + "External id": 467265,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 20357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267453932651.540, "dur": 37.104, + "args": { + "External id": 467266,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 20358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453932653.986, "dur": 9.534, + "args": { + "External id": 467267,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 20359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453932657.831, "dur": 4.906, + "args": { + "External id": 467268,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453932664.455, "dur": 23.598, + "args": { + "External id": 467269,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 20361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453932708.583, "dur": 30.424, + "args": { + "External id": 467270,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 20362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453932709.945, "dur": 6.150, + "args": { + "External id": 467271,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 20363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453932712.010, "dur": 3.775, + "args": { + "External id": 467272,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267453932717.847, "dur": 20.918, + "args": { + "External id": 467273,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453932719.831, "dur": 18.511, + "args": { + "External id": 467274,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 20366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453932743.235, "dur": 26.540, + "args": { + "External id": 467275,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453932744.189, "dur": 5.111, + "args": { + "External id": 467276,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453932745.991, "dur": 2.997, + "args": { + "External id": 467277,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267453932752.201, "dur": 17.353, + "args": { + "External id": 467278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453932753.069, "dur": 15.981, + "args": { + "External id": 467279,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 20371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2338707, + "ts": 6267453932777.230, "dur": 19.821, + "args": { + "External id": 467280,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 20372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453932779.225, "dur": 2.805, + "args": { + "External id": 467281,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267453932782.842, "dur": 13.865, + "args": { + "External id": 467282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 20374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453932784.116, "dur": 12.235, + "args": { + "External id": 467283,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2338707, + "ts": 6267453932802.176, "dur": 25.674, + "args": { + "External id": 467284,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453932831.111, "dur": 59.201, + "args": { + "External id": 467285,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453932836.520, "dur": 53.389, + "args": { + "External id": 467286,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453932841.239, "dur": 2.877, + "args": { + "External id": 467287,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 20379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453932845.852, "dur": 26.388, + "args": { + "External id": 467288,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453932847.769, "dur": 24.208, + "args": { + "External id": 467289,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 20381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453932850.764, "dur": 3.566, + "args": { + "External id": 467290,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453932855.142, "dur": 16.467, + "args": { + "External id": 467291,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 20383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2338707, + "ts": 6267453932895.382, "dur": 39321.740, + "args": { + "External id": 467292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 20384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2338707, + "ts": 6267453932897.319, "dur": 39318.216, + "args": { + "External id": 467293,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 20385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453972230.793, "dur": 10.577, + "args": { + "External id": 467294,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453972236.788, "dur": 1.419, + "args": { + "External id": 467295,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453972251.576, "dur": 135.603, + "args": { + "External id": 467296,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453972254.449, "dur": 8.233, + "args": { + "External id": 467297,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453972257.206, "dur": 3.432, + "args": { + "External id": 467298,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453972259.526, "dur": 0.738, + "args": { + "External id": 467299,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453972264.375, "dur": 121.627, + "args": { + "External id": 467300,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453972266.302, "dur": 118.638, + "args": { + "External id": 467301,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453972392.932, "dur": 6.152, + "args": { + "External id": 467302,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453972396.569, "dur": 0.791, + "args": { + "External id": 467303,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453972410.771, "dur": 3.775, + "args": { + "External id": 467304,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453972427.295, "dur": 8.763, + "args": { + "External id": 467305,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453972430.696, "dur": 4.963, + "args": { + "External id": 467306,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453972632.558, "dur": 280.626, + "args": { + "External id": 467307,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453972638.385, "dur": 4.832, + "args": { + "External id": 467308,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453972648.916, "dur": 262.442, + "args": { + "External id": 467309,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453972650.847, "dur": 0.511, + "args": { + "External id": 467310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453972654.086, "dur": 36.166, + "args": { + "External id": 467311,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453972693.575, "dur": 6.885, + "args": { + "External id": 467312,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453972699.126, "dur": 0.913, + "args": { + "External id": 467313,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453972701.719, "dur": 33.522, + "args": { + "External id": 467314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453972703.050, "dur": 2.396, + "args": { + "External id": 467315,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453972707.096, "dur": 27.801, + "args": { + "External id": 467316,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453972712.583, "dur": 5.107, + "args": { + "External id": 467317,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453972741.160, "dur": 29.561, + "args": { + "External id": 467318,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453972774.210, "dur": 22.615, + "args": { + "External id": 467319,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453972801.682, "dur": 19.416, + "args": { + "External id": 467320,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453972823.002, "dur": 18.080, + "args": { + "External id": 467321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453972844.526, "dur": 27.690, + "args": { + "External id": 467322,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453972847.520, "dur": 1.973, + "args": { + "External id": 467323,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453972852.686, "dur": 0.726, + "args": { + "External id": 467324,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453972877.686, "dur": 16.413, + "args": { + "External id": 467325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453972896.371, "dur": 13.475, + "args": { + "External id": 467326,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453972923.930, "dur": 3.010, + "args": { + "External id": 467327,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453972936.343, "dur": 5.600, + "args": { + "External id": 467328,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453972940.287, "dur": 0.463, + "args": { + "External id": 467329,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453973051.348, "dur": 136.250, + "args": { + "External id": 467330,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453973197.916, "dur": 12.762, + "args": { + "External id": 467331,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453973204.126, "dur": 1.412, + "args": { + "External id": 467332,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453973215.252, "dur": 34.234, + "args": { + "External id": 467333,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453973256.887, "dur": 8.893, + "args": { + "External id": 467334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453973259.365, "dur": 5.235, + "args": { + "External id": 467335,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453973262.794, "dur": 1.497, + "args": { + "External id": 467336,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453973270.389, "dur": 57.161, + "args": { + "External id": 467337,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453973271.995, "dur": 55.018, + "args": { + "External id": 467338,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453973333.126, "dur": 18.602, + "args": { + "External id": 467339,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453973362.510, "dur": 5.243, + "args": { + "External id": 467340,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453973366.143, "dur": 0.420, + "args": { + "External id": 467341,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453973373.208, "dur": 63.609, + "args": { + "External id": 467342,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453973374.537, "dur": 4.413, + "args": { + "External id": 467343,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453973375.525, "dur": 2.708, + "args": { + "External id": 467344,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453973377.188, "dur": 0.872, + "args": { + "External id": 467345,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453973385.326, "dur": 50.982, + "args": { + "External id": 467346,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453973386.020, "dur": 49.626, + "args": { + "External id": 467347,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453973442.116, "dur": 4.250, + "args": { + "External id": 467348,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453973444.305, "dur": 0.535, + "args": { + "External id": 467349,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453973454.406, "dur": 1.829, + "args": { + "External id": 467350,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453973466.245, "dur": 13.582, + "args": { + "External id": 467351,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453973471.852, "dur": 7.611, + "args": { + "External id": 467352,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453973618.939, "dur": 229.768, + "args": { + "External id": 467353,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453973623.353, "dur": 3.535, + "args": { + "External id": 467354,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453973628.814, "dur": 219.376, + "args": { + "External id": 467355,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453973630.938, "dur": 0.543, + "args": { + "External id": 467356,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453973633.119, "dur": 31.304, + "args": { + "External id": 467357,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453973666.352, "dur": 4.974, + "args": { + "External id": 467358,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453973669.906, "dur": 1.085, + "args": { + "External id": 467359,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453973674.837, "dur": 29.062, + "args": { + "External id": 467360,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453973676.413, "dur": 1.486, + "args": { + "External id": 467361,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453973679.606, "dur": 23.865, + "args": { + "External id": 467362,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453973685.567, "dur": 3.550, + "args": { + "External id": 467363,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453973705.967, "dur": 29.048, + "args": { + "External id": 467364,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453973737.522, "dur": 15.679, + "args": { + "External id": 467365,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453973756.513, "dur": 16.542, + "args": { + "External id": 467366,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453973775.080, "dur": 14.285, + "args": { + "External id": 467367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453973791.606, "dur": 24.507, + "args": { + "External id": 467368,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453973794.164, "dur": 1.734, + "args": { + "External id": 467369,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453973799.971, "dur": 0.945, + "args": { + "External id": 467370,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453973817.506, "dur": 14.193, + "args": { + "External id": 467371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453973833.293, "dur": 13.569, + "args": { + "External id": 467372,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453973857.863, "dur": 2.247, + "args": { + "External id": 467373,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453973871.263, "dur": 5.217, + "args": { + "External id": 467374,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453973875.147, "dur": 0.374, + "args": { + "External id": 467375,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453973962.090, "dur": 75.669, + "args": { + "External id": 467376,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453974044.313, "dur": 49.892, + "args": { + "External id": 467377,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453974050.529, "dur": 0.990, + "args": { + "External id": 467378,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453974097.076, "dur": 36.541, + "args": { + "External id": 467379,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453974140.449, "dur": 8.562, + "args": { + "External id": 467380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453974142.577, "dur": 5.527, + "args": { + "External id": 467381,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453974146.450, "dur": 1.384, + "args": { + "External id": 467382,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453974152.897, "dur": 63.462, + "args": { + "External id": 467383,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453974157.171, "dur": 58.499, + "args": { + "External id": 467384,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453974221.111, "dur": 18.765, + "args": { + "External id": 467385,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453974247.411, "dur": 4.502, + "args": { + "External id": 467386,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453974250.204, "dur": 0.717, + "args": { + "External id": 467387,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453974257.192, "dur": 58.452, + "args": { + "External id": 467388,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453974258.201, "dur": 9.057, + "args": { + "External id": 467389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453974259.107, "dur": 7.352, + "args": { + "External id": 467390,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453974263.301, "dur": 2.919, + "args": { + "External id": 467391,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453974267.917, "dur": 47.217, + "args": { + "External id": 467392,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453974268.674, "dur": 45.898, + "args": { + "External id": 467393,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453974320.774, "dur": 4.412, + "args": { + "External id": 467394,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453974323.227, "dur": 0.507, + "args": { + "External id": 467395,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453974332.673, "dur": 1.545, + "args": { + "External id": 467396,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453974344.118, "dur": 10.443, + "args": { + "External id": 467397,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453974348.765, "dur": 5.423, + "args": { + "External id": 467398,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453974481.030, "dur": 239.664, + "args": { + "External id": 467399,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453974484.865, "dur": 2.010, + "args": { + "External id": 467400,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453974488.631, "dur": 231.553, + "args": { + "External id": 467401,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453974489.967, "dur": 0.411, + "args": { + "External id": 467402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453974494.294, "dur": 26.381, + "args": { + "External id": 467403,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453974522.612, "dur": 3.976, + "args": { + "External id": 467404,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453974525.596, "dur": 0.759, + "args": { + "External id": 467405,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453974529.928, "dur": 26.747, + "args": { + "External id": 467406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453974531.262, "dur": 3.358, + "args": { + "External id": 467407,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453974536.404, "dur": 19.937, + "args": { + "External id": 467408,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453974539.937, "dur": 3.833, + "args": { + "External id": 467409,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453974558.270, "dur": 22.334, + "args": { + "External id": 467410,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453974582.551, "dur": 35.020, + "args": { + "External id": 467411,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453974621.875, "dur": 17.118, + "args": { + "External id": 467412,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453974641.014, "dur": 15.415, + "args": { + "External id": 467413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453974659.097, "dur": 29.145, + "args": { + "External id": 467414,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453974664.159, "dur": 2.032, + "args": { + "External id": 467415,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453974669.096, "dur": 0.831, + "args": { + "External id": 467416,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453974689.994, "dur": 15.433, + "args": { + "External id": 467417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453974706.697, "dur": 12.213, + "args": { + "External id": 467418,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453974729.897, "dur": 2.402, + "args": { + "External id": 467419,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453974743.289, "dur": 5.962, + "args": { + "External id": 467420,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453974746.495, "dur": 0.574, + "args": { + "External id": 467421,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453974836.211, "dur": 70.376, + "args": { + "External id": 467422,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453974912.329, "dur": 5.672, + "args": { + "External id": 467423,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453974915.714, "dur": 0.862, + "args": { + "External id": 467424,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453974919.746, "dur": 29.444, + "args": { + "External id": 467425,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453974954.051, "dur": 11.280, + "args": { + "External id": 467426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453974955.934, "dur": 8.562, + "args": { + "External id": 467427,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453974961.083, "dur": 3.053, + "args": { + "External id": 467428,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453974971.476, "dur": 51.099, + "args": { + "External id": 467429,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453974972.872, "dur": 48.854, + "args": { + "External id": 467430,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453975026.990, "dur": 18.534, + "args": { + "External id": 467431,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453975052.299, "dur": 44.679, + "args": { + "External id": 467432,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453975055.096, "dur": 38.148, + "args": { + "External id": 467433,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453975104.451, "dur": 65.379, + "args": { + "External id": 467434,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453975107.921, "dur": 5.375, + "args": { + "External id": 467435,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453975109.226, "dur": 3.353, + "args": { + "External id": 467436,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453975111.471, "dur": 0.706, + "args": { + "External id": 467437,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453975114.010, "dur": 55.280, + "args": { + "External id": 467438,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453975114.978, "dur": 53.599, + "args": { + "External id": 467439,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453975174.771, "dur": 4.553, + "args": { + "External id": 467440,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453975177.163, "dur": 0.787, + "args": { + "External id": 467441,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453975188.748, "dur": 2.469, + "args": { + "External id": 467442,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453975201.586, "dur": 10.333, + "args": { + "External id": 467443,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453975203.866, "dur": 7.672, + "args": { + "External id": 467444,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453975318.825, "dur": 256.006, + "args": { + "External id": 467445,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453975321.564, "dur": 2.142, + "args": { + "External id": 467446,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453975327.887, "dur": 246.307, + "args": { + "External id": 467447,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453975329.736, "dur": 0.380, + "args": { + "External id": 467448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453975331.942, "dur": 27.911, + "args": { + "External id": 467449,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453975361.611, "dur": 3.565, + "args": { + "External id": 467450,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453975364.291, "dur": 0.641, + "args": { + "External id": 467451,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453975366.296, "dur": 27.047, + "args": { + "External id": 467452,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453975367.370, "dur": 1.236, + "args": { + "External id": 467453,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453975370.271, "dur": 22.720, + "args": { + "External id": 467454,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453975376.253, "dur": 2.456, + "args": { + "External id": 467455,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453975395.193, "dur": 24.127, + "args": { + "External id": 467456,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453975456.601, "dur": 16.476, + "args": { + "External id": 467457,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453975478.983, "dur": 17.370, + "args": { + "External id": 467458,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453975497.941, "dur": 14.175, + "args": { + "External id": 467459,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453975516.406, "dur": 23.100, + "args": { + "External id": 467460,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453975519.096, "dur": 1.742, + "args": { + "External id": 467461,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453975523.284, "dur": 0.483, + "args": { + "External id": 467462,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453975540.820, "dur": 14.551, + "args": { + "External id": 467463,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453975559.164, "dur": 13.834, + "args": { + "External id": 467464,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453975582.726, "dur": 1.984, + "args": { + "External id": 467465,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453975613.260, "dur": 6.511, + "args": { + "External id": 467466,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453975617.659, "dur": 0.758, + "args": { + "External id": 467467,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453975706.211, "dur": 70.945, + "args": { + "External id": 467468,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453975783.725, "dur": 7.066, + "args": { + "External id": 467469,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453975787.304, "dur": 2.224, + "args": { + "External id": 467470,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453975792.627, "dur": 32.448, + "args": { + "External id": 467471,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453975830.614, "dur": 8.715, + "args": { + "External id": 467472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453975835.181, "dur": 3.344, + "args": { + "External id": 467473,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453975837.420, "dur": 0.858, + "args": { + "External id": 467474,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453975842.761, "dur": 47.932, + "args": { + "External id": 467475,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453975844.034, "dur": 45.947, + "args": { + "External id": 467476,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453975894.835, "dur": 18.766, + "args": { + "External id": 467477,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453975919.875, "dur": 6.637, + "args": { + "External id": 467478,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453975925.018, "dur": 0.507, + "args": { + "External id": 467479,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453975931.087, "dur": 58.174, + "args": { + "External id": 467480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453975932.041, "dur": 4.669, + "args": { + "External id": 467481,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453975933.504, "dur": 2.548, + "args": { + "External id": 467482,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453975935.277, "dur": 0.648, + "args": { + "External id": 467483,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453975937.511, "dur": 51.235, + "args": { + "External id": 467484,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453975938.112, "dur": 49.929, + "args": { + "External id": 467485,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453975994.392, "dur": 4.295, + "args": { + "External id": 467486,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453975996.739, "dur": 0.561, + "args": { + "External id": 467487,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453976007.575, "dur": 2.117, + "args": { + "External id": 467488,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453976018.919, "dur": 7.380, + "args": { + "External id": 467489,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453976021.206, "dur": 4.686, + "args": { + "External id": 467490,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453976184.523, "dur": 227.889, + "args": { + "External id": 467491,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453976187.609, "dur": 5.137, + "args": { + "External id": 467492,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453976197.329, "dur": 214.363, + "args": { + "External id": 467493,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453976199.184, "dur": 0.413, + "args": { + "External id": 467494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453976201.152, "dur": 28.346, + "args": { + "External id": 467495,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453976231.803, "dur": 5.777, + "args": { + "External id": 467496,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453976236.240, "dur": 0.983, + "args": { + "External id": 467497,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453976238.815, "dur": 26.087, + "args": { + "External id": 467498,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453976239.853, "dur": 1.305, + "args": { + "External id": 467499,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453976242.666, "dur": 21.912, + "args": { + "External id": 467500,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453976246.210, "dur": 3.821, + "args": { + "External id": 467501,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453976269.186, "dur": 25.759, + "args": { + "External id": 467502,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453976296.673, "dur": 17.392, + "args": { + "External id": 467503,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453976317.174, "dur": 16.553, + "args": { + "External id": 467504,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453976335.343, "dur": 14.858, + "args": { + "External id": 467505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453976352.286, "dur": 25.773, + "args": { + "External id": 467506,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453976357.017, "dur": 1.359, + "args": { + "External id": 467507,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453976361.058, "dur": 0.976, + "args": { + "External id": 467508,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453976382.010, "dur": 14.810, + "args": { + "External id": 467509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453976398.110, "dur": 12.347, + "args": { + "External id": 467510,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453976421.694, "dur": 2.638, + "args": { + "External id": 467511,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453976435.287, "dur": 4.552, + "args": { + "External id": 467512,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453976438.076, "dur": 0.751, + "args": { + "External id": 467513,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453976521.060, "dur": 69.191, + "args": { + "External id": 467514,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453976614.108, "dur": 7.889, + "args": { + "External id": 467515,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453976618.574, "dur": 1.174, + "args": { + "External id": 467516,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453976626.330, "dur": 33.991, + "args": { + "External id": 467517,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453976666.181, "dur": 14.148, + "args": { + "External id": 467518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453976673.592, "dur": 5.823, + "args": { + "External id": 467519,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453976676.495, "dur": 2.654, + "args": { + "External id": 467520,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453976684.161, "dur": 56.618, + "args": { + "External id": 467521,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453976685.505, "dur": 54.329, + "args": { + "External id": 467522,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453976748.151, "dur": 19.626, + "args": { + "External id": 467523,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453976775.763, "dur": 4.363, + "args": { + "External id": 467524,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453976778.496, "dur": 0.641, + "args": { + "External id": 467525,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453976785.452, "dur": 54.108, + "args": { + "External id": 467526,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453976786.483, "dur": 6.607, + "args": { + "External id": 467527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453976787.343, "dur": 4.991, + "args": { + "External id": 467528,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453976791.669, "dur": 0.470, + "args": { + "External id": 467529,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453976793.947, "dur": 45.112, + "args": { + "External id": 467530,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453976794.820, "dur": 43.588, + "args": { + "External id": 467531,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453976844.084, "dur": 4.265, + "args": { + "External id": 467532,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453976846.555, "dur": 0.507, + "args": { + "External id": 467533,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453976855.211, "dur": 1.762, + "args": { + "External id": 467534,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453976869.267, "dur": 8.781, + "args": { + "External id": 467535,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453976871.436, "dur": 6.272, + "args": { + "External id": 467536,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453976980.736, "dur": 248.353, + "args": { + "External id": 467537,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453976983.439, "dur": 2.075, + "args": { + "External id": 467538,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453976989.506, "dur": 238.990, + "args": { + "External id": 467539,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453976990.945, "dur": 0.401, + "args": { + "External id": 467540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453976992.646, "dur": 23.817, + "args": { + "External id": 467541,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453977020.666, "dur": 5.296, + "args": { + "External id": 467542,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453977023.207, "dur": 2.476, + "args": { + "External id": 467543,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453977027.056, "dur": 22.898, + "args": { + "External id": 467544,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453977028.032, "dur": 1.304, + "args": { + "External id": 467545,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453977030.989, "dur": 18.662, + "args": { + "External id": 467546,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453977033.989, "dur": 3.133, + "args": { + "External id": 467547,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453977051.844, "dur": 67.886, + "args": { + "External id": 467548,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453977122.863, "dur": 15.042, + "args": { + "External id": 467549,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453977141.162, "dur": 14.514, + "args": { + "External id": 467550,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453977157.436, "dur": 12.384, + "args": { + "External id": 467551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453977172.225, "dur": 27.721, + "args": { + "External id": 467552,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453977176.888, "dur": 1.868, + "args": { + "External id": 467553,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453977181.878, "dur": 2.699, + "args": { + "External id": 467554,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453977201.451, "dur": 12.921, + "args": { + "External id": 467555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453977215.754, "dur": 11.416, + "args": { + "External id": 467556,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453977239.212, "dur": 2.639, + "args": { + "External id": 467557,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453977252.456, "dur": 4.830, + "args": { + "External id": 467558,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453977255.668, "dur": 0.539, + "args": { + "External id": 467559,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453977340.309, "dur": 64.478, + "args": { + "External id": 467560,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453977413.818, "dur": 5.663, + "args": { + "External id": 467561,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453977417.396, "dur": 0.692, + "args": { + "External id": 467562,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453977421.206, "dur": 27.380, + "args": { + "External id": 467563,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453977454.203, "dur": 7.032, + "args": { + "External id": 467564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453977456.042, "dur": 4.275, + "args": { + "External id": 467565,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453977458.602, "dur": 1.448, + "args": { + "External id": 467566,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453977466.482, "dur": 48.155, + "args": { + "External id": 467567,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453977467.897, "dur": 45.900, + "args": { + "External id": 467568,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453977519.361, "dur": 15.536, + "args": { + "External id": 467569,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453977541.895, "dur": 3.985, + "args": { + "External id": 467570,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453977544.214, "dur": 0.576, + "args": { + "External id": 467571,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453977550.275, "dur": 70.318, + "args": { + "External id": 467572,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453977551.212, "dur": 6.635, + "args": { + "External id": 467573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453977554.808, "dur": 2.370, + "args": { + "External id": 467574,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453977556.568, "dur": 0.474, + "args": { + "External id": 467575,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453977558.399, "dur": 61.611, + "args": { + "External id": 467576,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453977559.263, "dur": 59.561, + "args": { + "External id": 467577,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453977627.697, "dur": 7.187, + "args": { + "External id": 467578,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453977630.460, "dur": 2.852, + "args": { + "External id": 467579,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453977642.266, "dur": 2.060, + "args": { + "External id": 467580,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453977655.888, "dur": 7.407, + "args": { + "External id": 467581,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453977658.132, "dur": 4.790, + "args": { + "External id": 467582,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453977764.803, "dur": 212.893, + "args": { + "External id": 467583,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453977769.017, "dur": 2.169, + "args": { + "External id": 467584,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453977773.037, "dur": 204.228, + "args": { + "External id": 467585,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453977774.497, "dur": 0.579, + "args": { + "External id": 467586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453977776.399, "dur": 25.575, + "args": { + "External id": 467587,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453977806.522, "dur": 4.976, + "args": { + "External id": 467588,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453977810.424, "dur": 0.815, + "args": { + "External id": 467589,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453977812.731, "dur": 25.167, + "args": { + "External id": 467590,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453977813.663, "dur": 1.186, + "args": { + "External id": 467591,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453977816.332, "dur": 21.151, + "args": { + "External id": 467592,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453977819.358, "dur": 3.187, + "args": { + "External id": 467593,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453977839.278, "dur": 23.936, + "args": { + "External id": 467594,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453977864.902, "dur": 17.922, + "args": { + "External id": 467595,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453977885.858, "dur": 15.434, + "args": { + "External id": 467596,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453977906.275, "dur": 14.173, + "args": { + "External id": 467597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453977924.862, "dur": 22.697, + "args": { + "External id": 467598,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453977927.209, "dur": 1.408, + "args": { + "External id": 467599,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453977930.531, "dur": 0.799, + "args": { + "External id": 467600,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453977948.992, "dur": 14.326, + "args": { + "External id": 467601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453977964.451, "dur": 11.648, + "args": { + "External id": 467602,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453977985.111, "dur": 2.058, + "args": { + "External id": 467603,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453977998.524, "dur": 4.354, + "args": { + "External id": 467604,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453978001.419, "dur": 0.465, + "args": { + "External id": 467605,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453978121.996, "dur": 75.202, + "args": { + "External id": 467606,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453978204.039, "dur": 7.119, + "args": { + "External id": 467607,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453978208.381, "dur": 1.063, + "args": { + "External id": 467608,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453978212.745, "dur": 30.914, + "args": { + "External id": 467609,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453978249.259, "dur": 10.575, + "args": { + "External id": 467610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453978251.617, "dur": 7.480, + "args": { + "External id": 467611,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453978256.305, "dur": 2.495, + "args": { + "External id": 467612,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453978263.095, "dur": 50.659, + "args": { + "External id": 467613,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453978264.519, "dur": 48.335, + "args": { + "External id": 467614,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453978318.230, "dur": 18.551, + "args": { + "External id": 467615,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453978343.475, "dur": 4.172, + "args": { + "External id": 467616,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453978346.179, "dur": 0.539, + "args": { + "External id": 467617,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338707, "tid": 2338707, + "ts": 6267453978354.623, "dur": 47.799, + "args": { + "External id": 467618,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453978355.392, "dur": 3.675, + "args": { + "External id": 467619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453978356.372, "dur": 2.151, + "args": { + "External id": 467620,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453978357.849, "dur": 0.488, + "args": { + "External id": 467621,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453978359.617, "dur": 42.345, + "args": { + "External id": 467622,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453978360.220, "dur": 40.996, + "args": { + "External id": 467623,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453978409.166, "dur": 4.430, + "args": { + "External id": 467624,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453978411.634, "dur": 0.613, + "args": { + "External id": 467625,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453978420.111, "dur": 1.768, + "args": { + "External id": 467626,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453978430.686, "dur": 7.400, + "args": { + "External id": 467627,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453978432.697, "dur": 5.032, + "args": { + "External id": 467628,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453978534.673, "dur": 219.998, + "args": { + "External id": 467629,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453978536.697, "dur": 3.943, + "args": { + "External id": 467630,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338707, "tid": 2338707, + "ts": 6267453978545.011, "dur": 209.074, + "args": { + "External id": 467631,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338707, "tid": 2338707, + "ts": 6267453978546.474, "dur": 0.580, + "args": { + "External id": 467632,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338707, "tid": 2338707, + "ts": 6267453978548.190, "dur": 22.350, + "args": { + "External id": 467633,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338707, "tid": 2338707, + "ts": 6267453978572.642, "dur": 4.829, + "args": { + "External id": 467634,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453978574.673, "dur": 2.522, + "args": { + "External id": 467635,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453978578.512, "dur": 45.545, + "args": { + "External id": 467636,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267453978579.946, "dur": 1.127, + "args": { + "External id": 467637,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267453978582.451, "dur": 40.918, + "args": { + "External id": 467638,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453978587.618, "dur": 3.156, + "args": { + "External id": 467639,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267453978626.359, "dur": 24.121, + "args": { + "External id": 467640,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453978652.213, "dur": 14.210, + "args": { + "External id": 467641,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338707, "tid": 2338707, + "ts": 6267453978669.222, "dur": 14.017, + "args": { + "External id": 467642,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338707, "tid": 2338707, + "ts": 6267453978684.882, "dur": 13.462, + "args": { + "External id": 467643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453978700.349, "dur": 23.437, + "args": { + "External id": 467644,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267453978702.945, "dur": 1.655, + "args": { + "External id": 467645,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453978706.707, "dur": 2.932, + "args": { + "External id": 467646,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338707, "tid": 2338707, + "ts": 6267453978727.635, "dur": 12.751, + "args": { + "External id": 467647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453978741.570, "dur": 11.329, + "args": { + "External id": 467648,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267453978764.002, "dur": 2.464, + "args": { + "External id": 467649,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453978776.160, "dur": 4.048, + "args": { + "External id": 467650,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453978778.533, "dur": 0.635, + "args": { + "External id": 467651,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453978854.540, "dur": 65.041, + "args": { + "External id": 467652,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338707, "tid": 2338707, + "ts": 6267453978924.851, "dur": 6.146, + "args": { + "External id": 467653,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453978929.013, "dur": 0.734, + "args": { + "External id": 467654,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453978932.513, "dur": 28.434, + "args": { + "External id": 467655,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338707, "tid": 2338707, + "ts": 6267453978968.780, "dur": 5.974, + "args": { + "External id": 467656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338707, "tid": 2338707, + "ts": 6267453978970.389, "dur": 3.510, + "args": { + "External id": 467657,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453978972.397, "dur": 1.260, + "args": { + "External id": 467658,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338707, "tid": 2338707, + "ts": 6267453978977.409, "dur": 46.213, + "args": { + "External id": 467659,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338707, "tid": 2338707, + "ts": 6267453978978.401, "dur": 44.459, + "args": { + "External id": 467660,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453979027.681, "dur": 14.849, + "args": { + "External id": 467661,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453979047.221, "dur": 75.854, + "args": { + "External id": 467662,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338707, "tid": 2338707, + "ts": 6267453979052.645, "dur": 69.844, + "args": { + "External id": 467663,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453979099.083, "dur": 1.018, + "args": { + "External id": 467664,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 20756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267453979130.028, "dur": 29.851, + "args": { + "External id": 467665,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338707, "tid": 2338707, + "ts": 6267453979132.061, "dur": 27.585, + "args": { + "External id": 467666,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 20758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453979138.058, "dur": 4.419, + "args": { + "External id": 467667,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267453979143.644, "dur": 15.547, + "args": { + "External id": 467668,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2338707, + "ts": 6267453979173.823, "dur": 6.963, + "args": { + "External id": 467669,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2338707, + "ts": 6267453979177.276, "dur": 3.225, + "args": { + "External id": 467670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338707, "tid": 2338707, + "ts": 6267453979182.049, "dur": 3.355, + "args": { + "External id": 467671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338707, "tid": 2338707, + "ts": 6267453979184.772, "dur": 0.550, + "args": { + "External id": 467672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453979237.027, "dur": 28.530, + "args": { + "External id": 467673,"Sequence number": 5036819, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338707, "tid": 2338707, + "ts": 6267453979267.702, "dur": 14.927, + "args": { + "External id": 467674,"Sequence number": 5036820, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20766 + } + }, + { + "ph": "s", "id": 226, "pid": 2338707, "tid": 2338707, "ts": 6267453979267.702, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 2338707, "tid": 2338707, + "ts": 6267453979414.636, "dur": 48.712, + "args": { + "External id": 467675,"Record function id": 0, "Ev Idx": 20767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338707, "tid": 2338707, + "ts": 6267453979587.843, "dur": 62.910, + "args": { + "External id": 467676,"Sequence number": 5036821, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 20768 + } + }, + { + "ph": "s", "id": 225, "pid": 2338707, "tid": 2338707, "ts": 6267453979587.843, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453979693.622, "dur": 31.871, + "args": { + "External id": 467677,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 20769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267453979695.529, "dur": 11.074, + "args": { + "External id": 467678,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 20770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267453979700.766, "dur": 5.014, + "args": { + "External id": 467679,"Record function id": 0, "Concrete Inputs": ["[1]", "[1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267453979708.404, "dur": 16.738, + "args": { + "External id": 467680,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 20772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338707, "tid": 2338707, + "ts": 6267456085445.075, "dur": 95.919, + "args": { + "External id": 467681,"Sequence number": 5036822, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 20773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338707, "tid": 2338707, + "ts": 6267456085552.343, "dur": 28.833, + "args": { + "External id": 467682,"Sequence number": 5036823, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 20774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2338707, + "ts": 6267456085611.060, "dur": 46.614, + "args": { + "External id": 467683,"Sequence number": 5036824, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2338707, + "ts": 6267456085662.579, "dur": 148.635, + "args": { + "External id": 467684,"Sequence number": 5036825, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[1], [1], []], "Ev Idx": 20776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2338707, + "ts": 6267456087379.586, "dur": 119.188, + "args": { + "External id": 467685,"Sequence number": 5036826, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2338707, + "ts": 6267456087506.706, "dur": 25.659, + "args": { + "External id": 467686,"Sequence number": 5036827, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2338707, + "ts": 6267456087551.586, "dur": 18.603, + "args": { + "External id": 467687,"Sequence number": 5036828, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2338707, + "ts": 6267456087573.499, "dur": 15.785, + "args": { + "External id": 467688,"Sequence number": 5036829, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[1], [1], []], "Ev Idx": 20780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2338707, "tid": 2338707, + "ts": 6267456092094.749, "dur": 3767.787, + "args": { + "External id": 467689,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2338707, "tid": 2338707, + "ts": 6267456092804.954, "dur": 1565.240, + "args": { + "External id": 467690,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338707, "tid": 2338707, + "ts": 6267456092828.223, "dur": 92.377, + "args": { + "External id": 467691,"Record function id": 0, "Concrete Inputs": ["[68250]", "6", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 20783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267456092833.757, "dur": 18.246, + "args": { + "External id": 467692,"Record function id": 0, "Concrete Inputs": ["[68250]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338707, "tid": 2338707, + "ts": 6267456092856.684, "dur": 63.492, + "args": { + "External id": 467693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[68250]], "Ev Idx": 20785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338707, "tid": 2338707, + "ts": 6267456092865.350, "dur": 53.930, + "args": { + "External id": 467694,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[68250], []], "Ev Idx": 20786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095897.596, "dur": 3.642, + "args": { + "External id": 467695,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095903.399, "dur": 0.586, + "args": { + "External id": 467696,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095905.523, "dur": 0.252, + "args": { + "External id": 467697,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095907.029, "dur": 0.456, + "args": { + "External id": 467698,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095908.843, "dur": 0.449, + "args": { + "External id": 467699,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095910.790, "dur": 0.513, + "args": { + "External id": 467700,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095912.276, "dur": 0.426, + "args": { + "External id": 467701,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095915.835, "dur": 0.432, + "args": { + "External id": 467702,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095917.022, "dur": 0.445, + "args": { + "External id": 467703,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095918.749, "dur": 0.445, + "args": { + "External id": 467704,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095920.088, "dur": 0.440, + "args": { + "External id": 467705,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095921.843, "dur": 0.213, + "args": { + "External id": 467706,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095922.943, "dur": 0.413, + "args": { + "External id": 467707,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095924.619, "dur": 0.210, + "args": { + "External id": 467708,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095925.829, "dur": 0.202, + "args": { + "External id": 467709,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095929.730, "dur": 0.205, + "args": { + "External id": 467710,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095930.799, "dur": 0.211, + "args": { + "External id": 467711,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095931.826, "dur": 0.230, + "args": { + "External id": 467712,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095932.978, "dur": 0.208, + "args": { + "External id": 467713,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095934.254, "dur": 0.422, + "args": { + "External id": 467714,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095935.545, "dur": 0.446, + "args": { + "External id": 467715,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095936.792, "dur": 0.445, + "args": { + "External id": 467716,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095938.016, "dur": 0.434, + "args": { + "External id": 467717,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095941.514, "dur": 0.316, + "args": { + "External id": 467718,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095942.582, "dur": 0.218, + "args": { + "External id": 467719,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095944.012, "dur": 0.446, + "args": { + "External id": 467720,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095945.230, "dur": 0.432, + "args": { + "External id": 467721,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095946.610, "dur": 0.297, + "args": { + "External id": 467722,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095947.967, "dur": 0.309, + "args": { + "External id": 467723,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095949.148, "dur": 0.336, + "args": { + "External id": 467724,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095950.280, "dur": 0.447, + "args": { + "External id": 467725,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095954.174, "dur": 0.432, + "args": { + "External id": 467726,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095955.425, "dur": 0.197, + "args": { + "External id": 467727,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095956.615, "dur": 0.215, + "args": { + "External id": 467728,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095957.641, "dur": 0.197, + "args": { + "External id": 467729,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095958.825, "dur": 0.222, + "args": { + "External id": 467730,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095960.022, "dur": 0.219, + "args": { + "External id": 467731,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095961.274, "dur": 0.221, + "args": { + "External id": 467732,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095962.500, "dur": 0.227, + "args": { + "External id": 467733,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095966.546, "dur": 0.219, + "args": { + "External id": 467734,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095967.694, "dur": 0.344, + "args": { + "External id": 467735,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095969.320, "dur": 0.443, + "args": { + "External id": 467736,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095971.893, "dur": 0.212, + "args": { + "External id": 467737,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095973.025, "dur": 0.483, + "args": { + "External id": 467738,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095974.423, "dur": 0.434, + "args": { + "External id": 467739,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095975.775, "dur": 0.204, + "args": { + "External id": 467740,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095976.697, "dur": 0.397, + "args": { + "External id": 467741,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095981.041, "dur": 0.426, + "args": { + "External id": 467742,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095982.261, "dur": 0.195, + "args": { + "External id": 467743,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095983.431, "dur": 0.195, + "args": { + "External id": 467744,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095984.737, "dur": 0.226, + "args": { + "External id": 467745,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095985.754, "dur": 0.197, + "args": { + "External id": 467746,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095986.951, "dur": 0.202, + "args": { + "External id": 467747,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095988.205, "dur": 0.206, + "args": { + "External id": 467748,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095989.214, "dur": 0.200, + "args": { + "External id": 467749,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095992.564, "dur": 0.225, + "args": { + "External id": 467750,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095993.576, "dur": 0.223, + "args": { + "External id": 467751,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095994.685, "dur": 0.213, + "args": { + "External id": 467752,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095995.830, "dur": 0.191, + "args": { + "External id": 467753,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095997.011, "dur": 0.226, + "args": { + "External id": 467754,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095998.114, "dur": 0.382, + "args": { + "External id": 467755,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456095999.250, "dur": 0.201, + "args": { + "External id": 467756,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096000.201, "dur": 0.212, + "args": { + "External id": 467757,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096003.629, "dur": 0.210, + "args": { + "External id": 467758,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096004.602, "dur": 0.455, + "args": { + "External id": 467759,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096006.110, "dur": 0.623, + "args": { + "External id": 467760,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096007.461, "dur": 0.190, + "args": { + "External id": 467761,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096008.368, "dur": 0.417, + "args": { + "External id": 467762,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096009.523, "dur": 0.385, + "args": { + "External id": 467763,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096010.795, "dur": 0.423, + "args": { + "External id": 467764,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096012.131, "dur": 0.403, + "args": { + "External id": 467765,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096015.529, "dur": 0.386, + "args": { + "External id": 467766,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096016.770, "dur": 0.223, + "args": { + "External id": 467767,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096018.077, "dur": 0.213, + "args": { + "External id": 467768,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096019.125, "dur": 0.414, + "args": { + "External id": 467769,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096020.364, "dur": 0.413, + "args": { + "External id": 467770,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096021.530, "dur": 0.213, + "args": { + "External id": 467771,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096023.170, "dur": 0.453, + "args": { + "External id": 467772,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096024.363, "dur": 0.692, + "args": { + "External id": 467773,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096028.310, "dur": 0.553, + "args": { + "External id": 467774,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096029.702, "dur": 0.426, + "args": { + "External id": 467775,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096031.248, "dur": 0.422, + "args": { + "External id": 467776,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096032.709, "dur": 0.314, + "args": { + "External id": 467777,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096033.881, "dur": 0.337, + "args": { + "External id": 467778,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096035.016, "dur": 0.338, + "args": { + "External id": 467779,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096036.522, "dur": 0.308, + "args": { + "External id": 467780,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096037.729, "dur": 0.194, + "args": { + "External id": 467781,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096041.099, "dur": 0.206, + "args": { + "External id": 467782,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096042.066, "dur": 0.323, + "args": { + "External id": 467783,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096043.327, "dur": 0.455, + "args": { + "External id": 467784,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096044.566, "dur": 0.199, + "args": { + "External id": 467785,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096045.507, "dur": 0.200, + "args": { + "External id": 467786,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096046.583, "dur": 0.210, + "args": { + "External id": 467787,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096048.330, "dur": 0.210, + "args": { + "External id": 467788,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096049.277, "dur": 0.223, + "args": { + "External id": 467789,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096085.821, "dur": 1.124, + "args": { + "External id": 467790,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096090.498, "dur": 0.483, + "args": { + "External id": 467791,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096092.031, "dur": 0.445, + "args": { + "External id": 467792,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096093.514, "dur": 0.400, + "args": { + "External id": 467793,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096094.703, "dur": 0.362, + "args": { + "External id": 467794,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096096.030, "dur": 0.355, + "args": { + "External id": 467795,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096097.309, "dur": 0.359, + "args": { + "External id": 467796,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096098.567, "dur": 0.360, + "args": { + "External id": 467797,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096101.961, "dur": 0.366, + "args": { + "External id": 467798,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096103.237, "dur": 0.246, + "args": { + "External id": 467799,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096104.393, "dur": 0.310, + "args": { + "External id": 467800,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096105.423, "dur": 0.408, + "args": { + "External id": 467801,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096106.687, "dur": 0.318, + "args": { + "External id": 467802,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096107.877, "dur": 0.213, + "args": { + "External id": 467803,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096109.562, "dur": 0.223, + "args": { + "External id": 467804,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096110.787, "dur": 0.203, + "args": { + "External id": 467805,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096114.811, "dur": 0.237, + "args": { + "External id": 467806,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096115.902, "dur": 0.226, + "args": { + "External id": 467807,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096116.908, "dur": 0.232, + "args": { + "External id": 467808,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096117.954, "dur": 0.198, + "args": { + "External id": 467809,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096118.932, "dur": 0.204, + "args": { + "External id": 467810,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096119.852, "dur": 0.199, + "args": { + "External id": 467811,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096120.759, "dur": 0.204, + "args": { + "External id": 467812,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096121.723, "dur": 0.228, + "args": { + "External id": 467813,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096125.177, "dur": 0.205, + "args": { + "External id": 467814,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096126.203, "dur": 0.205, + "args": { + "External id": 467815,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096127.157, "dur": 0.201, + "args": { + "External id": 467816,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096128.130, "dur": 0.231, + "args": { + "External id": 467817,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096129.214, "dur": 0.207, + "args": { + "External id": 467818,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096130.357, "dur": 0.205, + "args": { + "External id": 467819,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096131.578, "dur": 0.219, + "args": { + "External id": 467820,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096132.670, "dur": 0.347, + "args": { + "External id": 467821,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096135.563, "dur": 0.428, + "args": { + "External id": 467822,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096136.817, "dur": 0.422, + "args": { + "External id": 467823,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096138.642, "dur": 0.567, + "args": { + "External id": 467824,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096140.447, "dur": 0.324, + "args": { + "External id": 467825,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096141.636, "dur": 0.308, + "args": { + "External id": 467826,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096142.688, "dur": 0.349, + "args": { + "External id": 467827,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096143.990, "dur": 0.374, + "args": { + "External id": 467828,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096145.097, "dur": 0.343, + "args": { + "External id": 467829,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096148.534, "dur": 0.324, + "args": { + "External id": 467830,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096149.594, "dur": 0.196, + "args": { + "External id": 467831,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096150.524, "dur": 0.200, + "args": { + "External id": 467832,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096151.577, "dur": 0.204, + "args": { + "External id": 467833,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096152.572, "dur": 0.206, + "args": { + "External id": 467834,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096153.530, "dur": 0.233, + "args": { + "External id": 467835,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096154.478, "dur": 0.199, + "args": { + "External id": 467836,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096155.472, "dur": 0.211, + "args": { + "External id": 467837,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096158.746, "dur": 0.216, + "args": { + "External id": 467838,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096159.695, "dur": 0.218, + "args": { + "External id": 467839,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096160.697, "dur": 0.210, + "args": { + "External id": 467840,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096161.718, "dur": 0.204, + "args": { + "External id": 467841,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096162.665, "dur": 0.212, + "args": { + "External id": 467842,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096163.604, "dur": 0.203, + "args": { + "External id": 467843,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096165.112, "dur": 0.225, + "args": { + "External id": 467844,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096166.132, "dur": 0.371, + "args": { + "External id": 467845,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096170.105, "dur": 0.680, + "args": { + "External id": 467846,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096171.589, "dur": 0.413, + "args": { + "External id": 467847,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096172.974, "dur": 0.320, + "args": { + "External id": 467848,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096174.169, "dur": 0.324, + "args": { + "External id": 467849,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096175.357, "dur": 0.287, + "args": { + "External id": 467850,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096178.487, "dur": 0.396, + "args": { + "External id": 467851,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096179.827, "dur": 0.351, + "args": { + "External id": 467852,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096180.941, "dur": 0.516, + "args": { + "External id": 467853,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096185.179, "dur": 0.338, + "args": { + "External id": 467854,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096186.328, "dur": 0.202, + "args": { + "External id": 467855,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096187.327, "dur": 0.330, + "args": { + "External id": 467856,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096188.527, "dur": 0.203, + "args": { + "External id": 467857,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096189.574, "dur": 0.236, + "args": { + "External id": 467858,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096190.685, "dur": 0.359, + "args": { + "External id": 467859,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096191.754, "dur": 0.416, + "args": { + "External id": 467860,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096192.907, "dur": 0.430, + "args": { + "External id": 467861,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096196.833, "dur": 0.350, + "args": { + "External id": 467862,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096198.100, "dur": 0.402, + "args": { + "External id": 467863,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096199.381, "dur": 0.458, + "args": { + "External id": 467864,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096200.663, "dur": 0.204, + "args": { + "External id": 467865,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096201.718, "dur": 0.206, + "args": { + "External id": 467866,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096202.787, "dur": 0.200, + "args": { + "External id": 467867,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096204.128, "dur": 0.209, + "args": { + "External id": 467868,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096205.172, "dur": 0.205, + "args": { + "External id": 467869,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096208.337, "dur": 0.206, + "args": { + "External id": 467870,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096209.343, "dur": 0.204, + "args": { + "External id": 467871,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096210.471, "dur": 0.423, + "args": { + "External id": 467872,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096211.787, "dur": 0.343, + "args": { + "External id": 467873,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096213.010, "dur": 0.237, + "args": { + "External id": 467874,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096214.264, "dur": 0.354, + "args": { + "External id": 467875,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096215.617, "dur": 0.211, + "args": { + "External id": 467876,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096216.584, "dur": 0.353, + "args": { + "External id": 467877,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096220.181, "dur": 0.343, + "args": { + "External id": 467878,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096221.269, "dur": 0.205, + "args": { + "External id": 467879,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096222.255, "dur": 0.298, + "args": { + "External id": 467880,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096223.457, "dur": 0.213, + "args": { + "External id": 467881,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096224.364, "dur": 0.227, + "args": { + "External id": 467882,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096225.408, "dur": 0.202, + "args": { + "External id": 467883,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096226.523, "dur": 0.221, + "args": { + "External id": 467884,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096232.456, "dur": 0.215, + "args": { + "External id": 467885,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096235.524, "dur": 0.214, + "args": { + "External id": 467886,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096236.541, "dur": 0.209, + "args": { + "External id": 467887,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096237.691, "dur": 0.209, + "args": { + "External id": 467888,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096238.815, "dur": 0.204, + "args": { + "External id": 467889,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096239.770, "dur": 0.200, + "args": { + "External id": 467890,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096240.977, "dur": 0.249, + "args": { + "External id": 467891,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096242.180, "dur": 0.214, + "args": { + "External id": 467892,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096243.327, "dur": 0.224, + "args": { + "External id": 467893,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096246.482, "dur": 0.214, + "args": { + "External id": 467894,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096247.593, "dur": 0.198, + "args": { + "External id": 467895,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096248.948, "dur": 0.213, + "args": { + "External id": 467896,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096250.090, "dur": 0.204, + "args": { + "External id": 467897,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096251.004, "dur": 0.392, + "args": { + "External id": 467898,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096252.135, "dur": 0.350, + "args": { + "External id": 467899,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096253.298, "dur": 0.360, + "args": { + "External id": 467900,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096254.384, "dur": 0.422, + "args": { + "External id": 467901,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096257.917, "dur": 0.433, + "args": { + "External id": 467902,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096259.111, "dur": 0.208, + "args": { + "External id": 467903,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096260.225, "dur": 0.207, + "args": { + "External id": 467904,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096261.223, "dur": 0.204, + "args": { + "External id": 467905,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096262.231, "dur": 0.209, + "args": { + "External id": 467906,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096263.165, "dur": 0.202, + "args": { + "External id": 467907,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096264.122, "dur": 0.228, + "args": { + "External id": 467908,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096265.094, "dur": 0.210, + "args": { + "External id": 467909,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096268.297, "dur": 0.212, + "args": { + "External id": 467910,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096269.277, "dur": 0.203, + "args": { + "External id": 467911,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096270.310, "dur": 0.198, + "args": { + "External id": 467912,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096271.239, "dur": 0.216, + "args": { + "External id": 467913,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096272.291, "dur": 0.202, + "args": { + "External id": 467914,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096273.296, "dur": 0.206, + "args": { + "External id": 467915,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096274.240, "dur": 0.225, + "args": { + "External id": 467916,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096275.178, "dur": 0.204, + "args": { + "External id": 467917,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096277.709, "dur": 0.488, + "args": { + "External id": 467918,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096278.970, "dur": 0.340, + "args": { + "External id": 467919,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096280.030, "dur": 0.420, + "args": { + "External id": 467920,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096281.207, "dur": 0.304, + "args": { + "External id": 467921,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096282.223, "dur": 0.374, + "args": { + "External id": 467922,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096283.358, "dur": 0.429, + "args": { + "External id": 467923,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096284.594, "dur": 0.443, + "args": { + "External id": 467924,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096285.788, "dur": 0.424, + "args": { + "External id": 467925,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096288.831, "dur": 0.406, + "args": { + "External id": 467926,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096290.007, "dur": 0.210, + "args": { + "External id": 467927,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096291.331, "dur": 0.320, + "args": { + "External id": 467928,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096292.368, "dur": 0.203, + "args": { + "External id": 467929,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096293.304, "dur": 0.210, + "args": { + "External id": 467930,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096294.411, "dur": 0.242, + "args": { + "External id": 467931,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096295.460, "dur": 0.211, + "args": { + "External id": 467932,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096296.431, "dur": 0.219, + "args": { + "External id": 467933,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096300.074, "dur": 0.208, + "args": { + "External id": 467934,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096301.026, "dur": 0.214, + "args": { + "External id": 467935,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096301.963, "dur": 0.204, + "args": { + "External id": 467936,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096303.048, "dur": 0.210, + "args": { + "External id": 467937,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096304.009, "dur": 0.208, + "args": { + "External id": 467938,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096305.137, "dur": 0.202, + "args": { + "External id": 467939,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096306.095, "dur": 0.213, + "args": { + "External id": 467940,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096307.110, "dur": 0.224, + "args": { + "External id": 467941,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096311.011, "dur": 0.207, + "args": { + "External id": 467942,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096311.935, "dur": 0.208, + "args": { + "External id": 467943,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096312.996, "dur": 0.209, + "args": { + "External id": 467944,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096313.944, "dur": 0.214, + "args": { + "External id": 467945,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096315.023, "dur": 0.206, + "args": { + "External id": 467946,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096316.117, "dur": 0.205, + "args": { + "External id": 467947,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096317.186, "dur": 0.211, + "args": { + "External id": 467948,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096318.215, "dur": 0.218, + "args": { + "External id": 467949,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096321.447, "dur": 0.210, + "args": { + "External id": 467950,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096322.478, "dur": 0.208, + "args": { + "External id": 467951,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096323.577, "dur": 0.204, + "args": { + "External id": 467952,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096324.719, "dur": 0.215, + "args": { + "External id": 467953,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096325.751, "dur": 0.194, + "args": { + "External id": 467954,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096326.802, "dur": 0.209, + "args": { + "External id": 467955,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096327.801, "dur": 0.211, + "args": { + "External id": 467956,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096328.742, "dur": 0.236, + "args": { + "External id": 467957,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096332.214, "dur": 0.220, + "args": { + "External id": 467958,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096333.218, "dur": 0.204, + "args": { + "External id": 467959,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096334.477, "dur": 0.234, + "args": { + "External id": 467960,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096335.617, "dur": 0.193, + "args": { + "External id": 467961,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096336.590, "dur": 0.301, + "args": { + "External id": 467962,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096337.654, "dur": 0.355, + "args": { + "External id": 467963,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096339.191, "dur": 0.346, + "args": { + "External id": 467964,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096342.317, "dur": 0.374, + "args": { + "External id": 467965,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096345.472, "dur": 0.369, + "args": { + "External id": 467966,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456096346.623, "dur": 0.350, + "args": { + "External id": 467967,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338707, "tid": 2338707, + "ts": 6267456096404.451, "dur": 1605.902, + "args": { + "External id": 467968,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338707, "tid": 2338707, + "ts": 6267456096851.457, "dur": 1067.784, + "args": { + "External id": 474113,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096861.964, "dur": 12.374, + "args": { + "External id": 474114,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096869.370, "dur": 4.291, + "args": { + "External id": 474115,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096874.890, "dur": 1.563, + "args": { + "External id": 474116,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096875.617, "dur": 0.756, + "args": { + "External id": 474117,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096876.852, "dur": 4.243, + "args": { + "External id": 474118,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096878.707, "dur": 2.139, + "args": { + "External id": 474119,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096881.412, "dur": 1.213, + "args": { + "External id": 474120,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096881.810, "dur": 0.751, + "args": { + "External id": 474121,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096882.954, "dur": 2.193, + "args": { + "External id": 474122,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096884.419, "dur": 0.633, + "args": { + "External id": 474123,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096885.404, "dur": 3.227, + "args": { + "External id": 474124,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096887.707, "dur": 0.860, + "args": { + "External id": 474125,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096890.935, "dur": 1.342, + "args": { + "External id": 474126,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096891.327, "dur": 0.876, + "args": { + "External id": 474127,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096892.594, "dur": 2.343, + "args": { + "External id": 474128,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096893.885, "dur": 0.969, + "args": { + "External id": 474129,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096895.168, "dur": 2.559, + "args": { + "External id": 474130,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096897.037, "dur": 0.615, + "args": { + "External id": 474131,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096897.946, "dur": 1.196, + "args": { + "External id": 474132,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096898.326, "dur": 0.558, + "args": { + "External id": 474133,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096899.385, "dur": 3.734, + "args": { + "External id": 474134,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096900.731, "dur": 2.316, + "args": { + "External id": 474135,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096903.359, "dur": 0.970, + "args": { + "External id": 474136,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096903.704, "dur": 0.555, + "args": { + "External id": 474137,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096904.734, "dur": 2.698, + "args": { + "External id": 474138,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096906.664, "dur": 0.697, + "args": { + "External id": 474139,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096907.654, "dur": 2.859, + "args": { + "External id": 474140,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096909.997, "dur": 0.442, + "args": { + "External id": 474141,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096913.123, "dur": 0.949, + "args": { + "External id": 474142,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096913.487, "dur": 0.516, + "args": { + "External id": 474143,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096914.312, "dur": 2.005, + "args": { + "External id": 474144,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096915.692, "dur": 0.559, + "args": { + "External id": 474145,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096916.545, "dur": 3.069, + "args": { + "External id": 474146,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096918.969, "dur": 0.570, + "args": { + "External id": 474147,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096919.844, "dur": 1.161, + "args": { + "External id": 474148,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096920.193, "dur": 0.748, + "args": { + "External id": 474149,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096921.235, "dur": 4.097, + "args": { + "External id": 474150,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096922.657, "dur": 2.599, + "args": { + "External id": 474151,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096925.595, "dur": 1.192, + "args": { + "External id": 474152,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096925.966, "dur": 0.758, + "args": { + "External id": 474153,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096927.092, "dur": 2.211, + "args": { + "External id": 474154,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096928.597, "dur": 0.635, + "args": { + "External id": 474155,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096929.521, "dur": 2.829, + "args": { + "External id": 474156,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096931.879, "dur": 0.404, + "args": { + "External id": 474157,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096934.937, "dur": 0.922, + "args": { + "External id": 474158,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096935.303, "dur": 0.490, + "args": { + "External id": 474159,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096936.108, "dur": 2.415, + "args": { + "External id": 474160,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096937.574, "dur": 0.875, + "args": { + "External id": 474161,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096938.941, "dur": 2.768, + "args": { + "External id": 474162,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096941.020, "dur": 0.611, + "args": { + "External id": 474163,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096941.931, "dur": 1.350, + "args": { + "External id": 474164,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096942.272, "dur": 0.931, + "args": { + "External id": 474165,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096943.520, "dur": 3.853, + "args": { + "External id": 474166,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096945.083, "dur": 2.130, + "args": { + "External id": 474167,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096947.599, "dur": 1.033, + "args": { + "External id": 474168,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096947.979, "dur": 0.589, + "args": { + "External id": 474169,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096949.203, "dur": 2.143, + "args": { + "External id": 474170,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096950.400, "dur": 0.867, + "args": { + "External id": 474171,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096951.573, "dur": 3.601, + "args": { + "External id": 474172,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096954.443, "dur": 0.658, + "args": { + "External id": 474173,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096957.283, "dur": 1.207, + "args": { + "External id": 474174,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096957.686, "dur": 0.738, + "args": { + "External id": 474175,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096958.708, "dur": 1.950, + "args": { + "External id": 474176,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096959.854, "dur": 0.738, + "args": { + "External id": 474177,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096960.879, "dur": 3.384, + "args": { + "External id": 474178,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096963.601, "dur": 0.585, + "args": { + "External id": 474179,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096964.722, "dur": 0.939, + "args": { + "External id": 474180,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096965.124, "dur": 0.469, + "args": { + "External id": 474181,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096965.883, "dur": 3.991, + "args": { + "External id": 474182,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096967.230, "dur": 2.567, + "args": { + "External id": 474183,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096970.100, "dur": 1.006, + "args": { + "External id": 474184,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096970.454, "dur": 0.585, + "args": { + "External id": 474185,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096971.337, "dur": 2.433, + "args": { + "External id": 474186,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096972.634, "dur": 1.069, + "args": { + "External id": 474187,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096974.041, "dur": 4.226, + "args": { + "External id": 474188,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096977.138, "dur": 1.064, + "args": { + "External id": 474189,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096980.379, "dur": 1.247, + "args": { + "External id": 474190,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096980.759, "dur": 0.792, + "args": { + "External id": 474191,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096981.885, "dur": 2.123, + "args": { + "External id": 474192,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096983.382, "dur": 0.558, + "args": { + "External id": 474193,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096984.286, "dur": 3.099, + "args": { + "External id": 474194,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096986.686, "dur": 0.628, + "args": { + "External id": 474195,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096987.613, "dur": 1.205, + "args": { + "External id": 474196,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096987.966, "dur": 0.788, + "args": { + "External id": 474197,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096989.042, "dur": 3.672, + "args": { + "External id": 474198,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096990.648, "dur": 1.982, + "args": { + "External id": 474199,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096992.943, "dur": 1.395, + "args": { + "External id": 474200,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096993.308, "dur": 0.965, + "args": { + "External id": 474201,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096994.559, "dur": 2.840, + "args": { + "External id": 474202,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456096996.113, "dur": 1.213, + "args": { + "External id": 474203,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456096997.618, "dur": 3.252, + "args": { + "External id": 474204,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097000.022, "dur": 0.784, + "args": { + "External id": 474205,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097003.639, "dur": 1.102, + "args": { + "External id": 474206,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097004.049, "dur": 0.624, + "args": { + "External id": 474207,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097004.961, "dur": 1.815, + "args": { + "External id": 474208,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097006.141, "dur": 0.566, + "args": { + "External id": 474209,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097007.022, "dur": 2.815, + "args": { + "External id": 474210,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097009.224, "dur": 0.539, + "args": { + "External id": 474211,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097010.083, "dur": 1.124, + "args": { + "External id": 474212,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097010.443, "dur": 0.696, + "args": { + "External id": 474213,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097011.546, "dur": 3.314, + "args": { + "External id": 474214,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097012.885, "dur": 1.894, + "args": { + "External id": 474215,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097015.167, "dur": 1.039, + "args": { + "External id": 474216,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097015.561, "dur": 0.565, + "args": { + "External id": 474217,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097016.449, "dur": 2.197, + "args": { + "External id": 474218,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097017.699, "dur": 0.871, + "args": { + "External id": 474219,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097018.867, "dur": 3.162, + "args": { + "External id": 474220,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097021.205, "dur": 0.756, + "args": { + "External id": 474221,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097024.386, "dur": 0.993, + "args": { + "External id": 474222,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097024.745, "dur": 0.567, + "args": { + "External id": 474223,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097025.631, "dur": 4.590, + "args": { + "External id": 474224,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097027.870, "dur": 2.279, + "args": { + "External id": 474225,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097030.448, "dur": 0.906, + "args": { + "External id": 474226,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097030.811, "dur": 0.476, + "args": { + "External id": 474227,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097031.575, "dur": 1.032, + "args": { + "External id": 474228,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097031.918, "dur": 0.627, + "args": { + "External id": 474229,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097032.854, "dur": 3.867, + "args": { + "External id": 474230,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097036.036, "dur": 0.618, + "args": { + "External id": 474231,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097037.039, "dur": 0.851, + "args": { + "External id": 474232,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097037.397, "dur": 0.425, + "args": { + "External id": 474233,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097038.119, "dur": 2.315, + "args": { + "External id": 474234,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097039.813, "dur": 0.554, + "args": { + "External id": 474235,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097040.653, "dur": 2.751, + "args": { + "External id": 474236,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097042.754, "dur": 0.586, + "args": { + "External id": 474237,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097046.486, "dur": 1.270, + "args": { + "External id": 474238,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097046.847, "dur": 0.834, + "args": { + "External id": 474239,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097048.006, "dur": 3.201, + "args": { + "External id": 474240,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097049.313, "dur": 1.820, + "args": { + "External id": 474241,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097051.432, "dur": 26.845, + "args": { + "External id": 474242,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097051.777, "dur": 25.589, + "args": { + "External id": 474243,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097079.949, "dur": 1.704, + "args": { + "External id": 474244,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097080.910, "dur": 0.671, + "args": { + "External id": 474245,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097081.982, "dur": 4.408, + "args": { + "External id": 474246,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097085.664, "dur": 0.648, + "args": { + "External id": 474247,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097086.697, "dur": 1.246, + "args": { + "External id": 474248,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097087.081, "dur": 0.786, + "args": { + "External id": 474249,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097088.198, "dur": 2.235, + "args": { + "External id": 474250,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097089.706, "dur": 0.660, + "args": { + "External id": 474251,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097090.670, "dur": 3.566, + "args": { + "External id": 474252,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097093.520, "dur": 0.649, + "args": { + "External id": 474253,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097097.071, "dur": 0.964, + "args": { + "External id": 474254,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097097.441, "dur": 0.528, + "args": { + "External id": 474255,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097098.288, "dur": 4.158, + "args": { + "External id": 474256,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097100.083, "dur": 2.296, + "args": { + "External id": 474257,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097102.679, "dur": 1.189, + "args": { + "External id": 474258,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097103.028, "dur": 0.780, + "args": { + "External id": 474259,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097104.286, "dur": 1.028, + "args": { + "External id": 474260,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097104.638, "dur": 0.608, + "args": { + "External id": 474261,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097105.724, "dur": 4.208, + "args": { + "External id": 474262,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097109.159, "dur": 0.711, + "args": { + "External id": 474263,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097110.210, "dur": 1.210, + "args": { + "External id": 474264,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097110.574, "dur": 0.769, + "args": { + "External id": 474265,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097111.646, "dur": 1.901, + "args": { + "External id": 474266,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097112.858, "dur": 0.625, + "args": { + "External id": 474267,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097113.845, "dur": 2.736, + "args": { + "External id": 474268,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097115.952, "dur": 0.555, + "args": { + "External id": 474269,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097119.152, "dur": 0.860, + "args": { + "External id": 474270,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097119.514, "dur": 0.417, + "args": { + "External id": 474271,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097120.236, "dur": 3.373, + "args": { + "External id": 474272,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097121.293, "dur": 2.156, + "args": { + "External id": 474273,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097123.860, "dur": 1.052, + "args": { + "External id": 474274,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097124.207, "dur": 0.627, + "args": { + "External id": 474275,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097125.133, "dur": 1.144, + "args": { + "External id": 474276,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097125.483, "dur": 0.731, + "args": { + "External id": 474277,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097126.701, "dur": 4.545, + "args": { + "External id": 474278,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097130.255, "dur": 0.925, + "args": { + "External id": 474279,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097131.523, "dur": 1.366, + "args": { + "External id": 474280,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097131.911, "dur": 0.914, + "args": { + "External id": 474281,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097133.114, "dur": 2.329, + "args": { + "External id": 474282,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097134.466, "dur": 0.917, + "args": { + "External id": 474283,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097135.887, "dur": 2.911, + "args": { + "External id": 474284,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097138.097, "dur": 0.637, + "args": { + "External id": 474285,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097141.381, "dur": 0.933, + "args": { + "External id": 474286,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097141.755, "dur": 0.495, + "args": { + "External id": 474287,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097142.536, "dur": 3.245, + "args": { + "External id": 474288,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097143.433, "dur": 2.138, + "args": { + "External id": 474289,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097146.013, "dur": 0.966, + "args": { + "External id": 474290,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097146.369, "dur": 0.537, + "args": { + "External id": 474291,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097147.213, "dur": 1.671, + "args": { + "External id": 474292,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097147.820, "dur": 0.997, + "args": { + "External id": 474293,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097149.138, "dur": 4.251, + "args": { + "External id": 474294,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097152.693, "dur": 0.628, + "args": { + "External id": 474295,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097153.656, "dur": 1.698, + "args": { + "External id": 474296,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097154.734, "dur": 0.560, + "args": { + "External id": 474297,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097155.705, "dur": 1.394, + "args": { + "External id": 474298,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097156.379, "dur": 0.655, + "args": { + "External id": 474299,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097157.485, "dur": 3.163, + "args": { + "External id": 474300,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097160.057, "dur": 0.530, + "args": { + "External id": 474301,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097163.007, "dur": 1.709, + "args": { + "External id": 474302,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097164.030, "dur": 0.611, + "args": { + "External id": 474303,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097164.963, "dur": 2.878, + "args": { + "External id": 474304,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097165.973, "dur": 1.794, + "args": { + "External id": 474305,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097168.086, "dur": 1.537, + "args": { + "External id": 474306,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097168.970, "dur": 0.591, + "args": { + "External id": 474307,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097169.863, "dur": 2.516, + "args": { + "External id": 474308,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097171.370, "dur": 0.944, + "args": { + "External id": 474309,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097172.621, "dur": 3.970, + "args": { + "External id": 474310,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097175.832, "dur": 0.694, + "args": { + "External id": 474311,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097176.870, "dur": 2.307, + "args": { + "External id": 474312,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097178.252, "dur": 0.864, + "args": { + "External id": 474313,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097179.679, "dur": 1.771, + "args": { + "External id": 474314,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097180.706, "dur": 0.681, + "args": { + "External id": 474315,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097181.693, "dur": 3.902, + "args": { + "External id": 474316,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097184.833, "dur": 0.698, + "args": { + "External id": 474317,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097188.243, "dur": 2.150, + "args": { + "External id": 474318,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097189.698, "dur": 0.629, + "args": { + "External id": 474319,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097190.653, "dur": 3.581, + "args": { + "External id": 474320,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097191.970, "dur": 2.010, + "args": { + "External id": 474321,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097194.513, "dur": 1.407, + "args": { + "External id": 474322,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097195.252, "dur": 0.595, + "args": { + "External id": 474323,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097196.190, "dur": 1.827, + "args": { + "External id": 474324,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097197.157, "dur": 0.795, + "args": { + "External id": 474325,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097198.397, "dur": 2.945, + "args": { + "External id": 474326,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097200.854, "dur": 0.426, + "args": { + "External id": 474327,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097201.606, "dur": 1.501, + "args": { + "External id": 474328,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097202.552, "dur": 0.495, + "args": { + "External id": 474329,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097203.352, "dur": 1.581, + "args": { + "External id": 474330,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097204.206, "dur": 0.666, + "args": { + "External id": 474331,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097205.176, "dur": 3.590, + "args": { + "External id": 474332,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097207.945, "dur": 0.748, + "args": { + "External id": 474333,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097211.210, "dur": 1.869, + "args": { + "External id": 474334,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097212.394, "dur": 0.622, + "args": { + "External id": 474335,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097213.321, "dur": 3.191, + "args": { + "External id": 474336,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097214.413, "dur": 2.033, + "args": { + "External id": 474337,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097216.759, "dur": 1.571, + "args": { + "External id": 474338,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097217.417, "dur": 0.848, + "args": { + "External id": 474339,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097218.799, "dur": 1.995, + "args": { + "External id": 474340,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097219.674, "dur": 1.058, + "args": { + "External id": 474341,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097221.036, "dur": 3.185, + "args": { + "External id": 474342,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097223.542, "dur": 0.619, + "args": { + "External id": 474343,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097224.495, "dur": 2.727, + "args": { + "External id": 474344,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097226.301, "dur": 0.848, + "args": { + "External id": 474345,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097227.468, "dur": 1.825, + "args": { + "External id": 474346,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097228.140, "dur": 1.091, + "args": { + "External id": 474347,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097229.532, "dur": 4.185, + "args": { + "External id": 474348,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097232.558, "dur": 1.095, + "args": { + "External id": 474349,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097236.307, "dur": 1.404, + "args": { + "External id": 474350,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097237.054, "dur": 0.593, + "args": { + "External id": 474351,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097237.952, "dur": 3.276, + "args": { + "External id": 474352,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097239.026, "dur": 2.134, + "args": { + "External id": 474353,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097241.487, "dur": 1.926, + "args": { + "External id": 474354,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097242.586, "dur": 0.762, + "args": { + "External id": 474355,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097243.654, "dur": 2.025, + "args": { + "External id": 474356,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097244.903, "dur": 0.711, + "args": { + "External id": 474357,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097246.145, "dur": 4.055, + "args": { + "External id": 474358,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097249.402, "dur": 0.735, + "args": { + "External id": 474359,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097250.479, "dur": 1.401, + "args": { + "External id": 474360,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097251.279, "dur": 0.535, + "args": { + "External id": 474361,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097252.117, "dur": 1.523, + "args": { + "External id": 474362,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097252.890, "dur": 0.687, + "args": { + "External id": 474363,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097253.886, "dur": 3.447, + "args": { + "External id": 474364,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097256.467, "dur": 0.787, + "args": { + "External id": 474365,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097260.660, "dur": 1.752, + "args": { + "External id": 474366,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097261.627, "dur": 0.712, + "args": { + "External id": 474367,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097262.680, "dur": 3.343, + "args": { + "External id": 474368,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097263.811, "dur": 2.142, + "args": { + "External id": 474369,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097266.279, "dur": 1.534, + "args": { + "External id": 474370,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097266.872, "dur": 0.876, + "args": { + "External id": 474371,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097268.164, "dur": 2.142, + "args": { + "External id": 474372,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097269.507, "dur": 0.736, + "args": { + "External id": 474373,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097270.574, "dur": 2.812, + "args": { + "External id": 474374,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097272.745, "dur": 0.581, + "args": { + "External id": 474375,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097273.661, "dur": 1.782, + "args": { + "External id": 474376,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097274.835, "dur": 0.548, + "args": { + "External id": 474377,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097275.681, "dur": 1.877, + "args": { + "External id": 474378,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097276.891, "dur": 0.596, + "args": { + "External id": 474379,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097277.826, "dur": 3.877, + "args": { + "External id": 474380,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097281.038, "dur": 0.599, + "args": { + "External id": 474381,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097284.955, "dur": 1.801, + "args": { + "External id": 474382,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097286.023, "dur": 0.669, + "args": { + "External id": 474383,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097287.001, "dur": 3.821, + "args": { + "External id": 474384,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097288.243, "dur": 2.502, + "args": { + "External id": 474385,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097291.078, "dur": 1.292, + "args": { + "External id": 474386,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097291.777, "dur": 0.530, + "args": { + "External id": 474387,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097292.622, "dur": 1.970, + "args": { + "External id": 474388,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097293.787, "dur": 0.741, + "args": { + "External id": 474389,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097294.847, "dur": 3.300, + "args": { + "External id": 474390,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097297.307, "dur": 0.768, + "args": { + "External id": 474391,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097298.408, "dur": 1.530, + "args": { + "External id": 474392,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097299.407, "dur": 0.463, + "args": { + "External id": 474393,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097300.183, "dur": 1.352, + "args": { + "External id": 474394,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097300.856, "dur": 0.616, + "args": { + "External id": 474395,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097301.787, "dur": 3.656, + "args": { + "External id": 474396,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097304.806, "dur": 0.570, + "args": { + "External id": 474397,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097307.791, "dur": 1.856, + "args": { + "External id": 474398,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097308.884, "dur": 0.697, + "args": { + "External id": 474399,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097309.887, "dur": 3.109, + "args": { + "External id": 474400,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097311.155, "dur": 1.770, + "args": { + "External id": 474401,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097313.241, "dur": 1.424, + "args": { + "External id": 474402,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097314.023, "dur": 0.580, + "args": { + "External id": 474403,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097314.905, "dur": 1.875, + "args": { + "External id": 474404,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097316.083, "dur": 0.620, + "args": { + "External id": 474405,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097317.070, "dur": 3.324, + "args": { + "External id": 474406,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097319.812, "dur": 0.511, + "args": { + "External id": 474407,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097320.664, "dur": 2.239, + "args": { + "External id": 474408,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097322.005, "dur": 0.838, + "args": { + "External id": 474409,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097323.159, "dur": 2.239, + "args": { + "External id": 474410,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097324.159, "dur": 1.175, + "args": { + "External id": 474411,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097325.692, "dur": 4.853, + "args": { + "External id": 474412,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097328.865, "dur": 1.602, + "args": { + "External id": 474413,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097334.328, "dur": 1.726, + "args": { + "External id": 474414,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097335.421, "dur": 0.558, + "args": { + "External id": 474415,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097336.294, "dur": 3.065, + "args": { + "External id": 474416,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097337.481, "dur": 1.811, + "args": { + "External id": 474417,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097339.653, "dur": 1.484, + "args": { + "External id": 474418,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097340.447, "dur": 0.612, + "args": { + "External id": 474419,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097341.379, "dur": 2.331, + "args": { + "External id": 474420,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097342.893, "dur": 0.746, + "args": { + "External id": 474421,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097343.970, "dur": 4.508, + "args": { + "External id": 474422,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097347.294, "dur": 1.116, + "args": { + "External id": 474423,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097348.740, "dur": 2.267, + "args": { + "External id": 474424,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097350.460, "dur": 0.482, + "args": { + "External id": 474425,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097351.250, "dur": 1.497, + "args": { + "External id": 474426,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097351.967, "dur": 0.714, + "args": { + "External id": 474427,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097353.109, "dur": 4.250, + "args": { + "External id": 474428,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097356.479, "dur": 0.815, + "args": { + "External id": 474429,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097359.206, "dur": 1.716, + "args": { + "External id": 474430,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097360.113, "dur": 0.729, + "args": { + "External id": 474431,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097361.337, "dur": 3.562, + "args": { + "External id": 474432,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097362.449, "dur": 2.098, + "args": { + "External id": 474433,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097365.187, "dur": 1.711, + "args": { + "External id": 474434,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097366.069, "dur": 0.765, + "args": { + "External id": 474435,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097367.155, "dur": 2.180, + "args": { + "External id": 474436,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097368.536, "dur": 0.735, + "args": { + "External id": 474437,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097369.637, "dur": 3.350, + "args": { + "External id": 474438,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097372.347, "dur": 0.565, + "args": { + "External id": 474439,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097373.302, "dur": 1.692, + "args": { + "External id": 474440,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097374.133, "dur": 0.800, + "args": { + "External id": 474441,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097375.463, "dur": 2.290, + "args": { + "External id": 474442,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097376.423, "dur": 1.267, + "args": { + "External id": 474443,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097378.027, "dur": 3.883, + "args": { + "External id": 474444,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097381.180, "dur": 0.669, + "args": { + "External id": 474445,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097383.965, "dur": 2.317, + "args": { + "External id": 474446,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097385.009, "dur": 1.205, + "args": { + "External id": 474447,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097386.538, "dur": 3.183, + "args": { + "External id": 474448,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097387.449, "dur": 2.100, + "args": { + "External id": 474449,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097389.967, "dur": 1.658, + "args": { + "External id": 474450,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097390.665, "dur": 0.894, + "args": { + "External id": 474451,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097391.917, "dur": 2.118, + "args": { + "External id": 474452,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097393.020, "dur": 0.950, + "args": { + "External id": 474453,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097394.280, "dur": 3.829, + "args": { + "External id": 474454,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097397.285, "dur": 0.746, + "args": { + "External id": 474455,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097398.381, "dur": 2.640, + "args": { + "External id": 474456,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097400.213, "dur": 0.748, + "args": { + "External id": 474457,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097401.264, "dur": 2.083, + "args": { + "External id": 474458,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097402.257, "dur": 1.023, + "args": { + "External id": 474459,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097403.587, "dur": 3.537, + "args": { + "External id": 474460,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097406.527, "dur": 0.528, + "args": { + "External id": 474461,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097409.660, "dur": 1.307, + "args": { + "External id": 474462,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097410.287, "dur": 0.616, + "args": { + "External id": 474463,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097411.230, "dur": 3.173, + "args": { + "External id": 474464,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097412.322, "dur": 2.015, + "args": { + "External id": 474465,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097414.647, "dur": 1.313, + "args": { + "External id": 474466,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097415.294, "dur": 0.603, + "args": { + "External id": 474467,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097416.201, "dur": 1.788, + "args": { + "External id": 474468,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097417.167, "dur": 0.755, + "args": { + "External id": 474469,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097418.234, "dur": 3.255, + "args": { + "External id": 474470,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097420.730, "dur": 0.691, + "args": { + "External id": 474471,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097421.758, "dur": 1.728, + "args": { + "External id": 474472,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097422.861, "dur": 0.565, + "args": { + "External id": 474473,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097423.874, "dur": 1.356, + "args": { + "External id": 474474,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097424.553, "dur": 0.614, + "args": { + "External id": 474475,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097425.552, "dur": 4.194, + "args": { + "External id": 474476,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097429.216, "dur": 0.465, + "args": { + "External id": 474477,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097431.956, "dur": 1.577, + "args": { + "External id": 474478,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097432.667, "dur": 0.800, + "args": { + "External id": 474479,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097433.794, "dur": 4.958, + "args": { + "External id": 474480,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097436.018, "dur": 2.666, + "args": { + "External id": 474481,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097438.993, "dur": 2.001, + "args": { + "External id": 474482,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097439.667, "dur": 1.247, + "args": { + "External id": 474483,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097441.543, "dur": 2.185, + "args": { + "External id": 474484,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097442.740, "dur": 0.924, + "args": { + "External id": 474485,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097443.989, "dur": 3.298, + "args": { + "External id": 474486,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097446.635, "dur": 0.588, + "args": { + "External id": 474487,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097447.645, "dur": 1.552, + "args": { + "External id": 474488,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097448.566, "dur": 0.566, + "args": { + "External id": 474489,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097449.437, "dur": 1.791, + "args": { + "External id": 474490,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097450.297, "dur": 0.866, + "args": { + "External id": 474491,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097451.505, "dur": 3.795, + "args": { + "External id": 474492,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097454.412, "dur": 0.811, + "args": { + "External id": 474493,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097457.349, "dur": 1.670, + "args": { + "External id": 474494,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097458.246, "dur": 0.697, + "args": { + "External id": 474495,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097459.261, "dur": 3.667, + "args": { + "External id": 474496,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097460.432, "dur": 2.222, + "args": { + "External id": 474497,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097463.177, "dur": 1.806, + "args": { + "External id": 474498,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097464.212, "dur": 0.707, + "args": { + "External id": 474499,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097465.427, "dur": 2.240, + "args": { + "External id": 474500,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097466.887, "dur": 0.708, + "args": { + "External id": 474501,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097467.905, "dur": 3.222, + "args": { + "External id": 474502,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097470.598, "dur": 0.465, + "args": { + "External id": 474503,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097471.454, "dur": 1.662, + "args": { + "External id": 474504,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097472.574, "dur": 0.483, + "args": { + "External id": 474505,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097473.373, "dur": 1.952, + "args": { + "External id": 474506,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097474.547, "dur": 0.714, + "args": { + "External id": 474507,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097475.670, "dur": 4.024, + "args": { + "External id": 474508,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097478.683, "dur": 0.942, + "args": { + "External id": 474509,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097482.203, "dur": 1.598, + "args": { + "External id": 474510,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097483.041, "dur": 0.696, + "args": { + "External id": 474511,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097484.037, "dur": 3.804, + "args": { + "External id": 474512,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097485.205, "dur": 2.562, + "args": { + "External id": 474513,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097488.107, "dur": 1.705, + "args": { + "External id": 474514,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097489.049, "dur": 0.685, + "args": { + "External id": 474515,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097490.069, "dur": 2.323, + "args": { + "External id": 474516,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097491.527, "dur": 0.800, + "args": { + "External id": 474517,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097492.720, "dur": 3.556, + "args": { + "External id": 474518,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097495.694, "dur": 0.520, + "args": { + "External id": 474519,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097496.550, "dur": 2.164, + "args": { + "External id": 474520,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097497.755, "dur": 0.879, + "args": { + "External id": 474521,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097499.154, "dur": 1.786, + "args": { + "External id": 474522,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097499.974, "dur": 0.900, + "args": { + "External id": 474523,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097501.212, "dur": 4.530, + "args": { + "External id": 474524,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097505.174, "dur": 0.507, + "args": { + "External id": 474525,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097508.385, "dur": 1.808, + "args": { + "External id": 474526,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097509.187, "dur": 0.937, + "args": { + "External id": 474527,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097510.452, "dur": 3.719, + "args": { + "External id": 474528,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097511.840, "dur": 2.131, + "args": { + "External id": 474529,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097514.446, "dur": 1.983, + "args": { + "External id": 474530,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097515.487, "dur": 0.872, + "args": { + "External id": 474531,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097516.872, "dur": 1.926, + "args": { + "External id": 474532,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097517.825, "dur": 0.910, + "args": { + "External id": 474533,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097519.275, "dur": 2.681, + "args": { + "External id": 474534,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097521.448, "dur": 0.443, + "args": { + "External id": 474535,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097522.219, "dur": 1.924, + "args": { + "External id": 474536,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097523.426, "dur": 0.655, + "args": { + "External id": 474537,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097524.399, "dur": 1.933, + "args": { + "External id": 474538,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097525.390, "dur": 0.871, + "args": { + "External id": 474539,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097526.599, "dur": 3.479, + "args": { + "External id": 474540,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097529.573, "dur": 0.428, + "args": { + "External id": 474541,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097532.243, "dur": 1.563, + "args": { + "External id": 474542,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097533.128, "dur": 0.614, + "args": { + "External id": 474543,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097534.177, "dur": 3.293, + "args": { + "External id": 474544,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097535.351, "dur": 2.051, + "args": { + "External id": 474545,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097537.721, "dur": 1.889, + "args": { + "External id": 474546,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097538.808, "dur": 0.739, + "args": { + "External id": 474547,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097540.045, "dur": 1.896, + "args": { + "External id": 474548,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097541.074, "dur": 0.800, + "args": { + "External id": 474549,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097542.188, "dur": 3.308, + "args": { + "External id": 474550,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097544.828, "dur": 0.598, + "args": { + "External id": 474551,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097545.766, "dur": 2.086, + "args": { + "External id": 474552,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097547.189, "dur": 0.599, + "args": { + "External id": 474553,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097548.279, "dur": 1.726, + "args": { + "External id": 474554,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097549.141, "dur": 0.800, + "args": { + "External id": 474555,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097550.254, "dur": 4.155, + "args": { + "External id": 474556,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097553.334, "dur": 1.006, + "args": { + "External id": 474557,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097557.149, "dur": 1.568, + "args": { + "External id": 474558,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097557.874, "dur": 0.779, + "args": { + "External id": 474559,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097559.138, "dur": 2.970, + "args": { + "External id": 474560,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097560.128, "dur": 1.909, + "args": { + "External id": 474561,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097562.368, "dur": 1.433, + "args": { + "External id": 474562,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097563.098, "dur": 0.634, + "args": { + "External id": 474563,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097564.073, "dur": 1.815, + "args": { + "External id": 474564,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097565.203, "dur": 0.619, + "args": { + "External id": 474565,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097566.301, "dur": 3.828, + "args": { + "External id": 474566,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097569.670, "dur": 0.397, + "args": { + "External id": 474567,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097570.401, "dur": 2.186, + "args": { + "External id": 474568,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097571.930, "dur": 0.580, + "args": { + "External id": 474569,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097572.841, "dur": 1.584, + "args": { + "External id": 474570,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097573.621, "dur": 0.738, + "args": { + "External id": 474571,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097574.661, "dur": 4.069, + "args": { + "External id": 474572,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097577.921, "dur": 0.743, + "args": { + "External id": 474573,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097581.089, "dur": 5.179, + "args": { + "External id": 474574,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097584.991, "dur": 1.123, + "args": { + "External id": 474575,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097586.532, "dur": 3.314, + "args": { + "External id": 474576,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097587.854, "dur": 1.916, + "args": { + "External id": 474577,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097590.109, "dur": 13.220, + "args": { + "External id": 474578,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097590.872, "dur": 11.822, + "args": { + "External id": 474579,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097604.743, "dur": 2.573, + "args": { + "External id": 474580,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097606.220, "dur": 1.030, + "args": { + "External id": 474581,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097607.634, "dur": 3.487, + "args": { + "External id": 474582,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097610.144, "dur": 0.911, + "args": { + "External id": 474583,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097611.433, "dur": 1.690, + "args": { + "External id": 474584,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097612.524, "dur": 0.537, + "args": { + "External id": 474585,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097613.368, "dur": 1.866, + "args": { + "External id": 474586,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097614.530, "dur": 0.638, + "args": { + "External id": 474587,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097615.711, "dur": 3.706, + "args": { + "External id": 474588,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097618.710, "dur": 0.640, + "args": { + "External id": 474589,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097621.161, "dur": 1.155, + "args": { + "External id": 474590,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097621.794, "dur": 0.460, + "args": { + "External id": 474591,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097622.574, "dur": 3.288, + "args": { + "External id": 474592,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097623.810, "dur": 1.786, + "args": { + "External id": 474593,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097626.150, "dur": 1.624, + "args": { + "External id": 474594,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097627.080, "dur": 0.623, + "args": { + "External id": 474595,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097628.105, "dur": 2.242, + "args": { + "External id": 474596,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097629.577, "dur": 0.702, + "args": { + "External id": 474597,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097630.683, "dur": 3.127, + "args": { + "External id": 474598,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097633.003, "dur": 0.737, + "args": { + "External id": 474599,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097634.165, "dur": 1.992, + "args": { + "External id": 474600,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097635.515, "dur": 0.582, + "args": { + "External id": 474601,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097636.406, "dur": 1.699, + "args": { + "External id": 474602,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097637.390, "dur": 0.644, + "args": { + "External id": 474603,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097638.377, "dur": 4.245, + "args": { + "External id": 474604,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097642.032, "dur": 0.520, + "args": { + "External id": 474605,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097644.628, "dur": 1.759, + "args": { + "External id": 474606,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097645.556, "dur": 0.761, + "args": { + "External id": 474607,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097646.683, "dur": 3.043, + "args": { + "External id": 474608,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097647.857, "dur": 1.800, + "args": { + "External id": 474609,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097650.021, "dur": 2.160, + "args": { + "External id": 474610,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097651.253, "dur": 0.865, + "args": { + "External id": 474611,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097652.663, "dur": 2.128, + "args": { + "External id": 474612,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097653.906, "dur": 0.821, + "args": { + "External id": 474613,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097655.239, "dur": 3.129, + "args": { + "External id": 474614,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097657.659, "dur": 0.644, + "args": { + "External id": 474615,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097658.664, "dur": 1.711, + "args": { + "External id": 474616,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097659.833, "dur": 0.476, + "args": { + "External id": 474617,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097660.657, "dur": 2.273, + "args": { + "External id": 474618,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097661.723, "dur": 0.903, + "args": { + "External id": 474619,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097663.188, "dur": 3.286, + "args": { + "External id": 474620,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097665.750, "dur": 0.647, + "args": { + "External id": 474621,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097669.188, "dur": 1.611, + "args": { + "External id": 474622,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097670.187, "dur": 0.548, + "args": { + "External id": 474623,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097671.077, "dur": 26.788, + "args": { + "External id": 474624,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097695.968, "dur": 1.800, + "args": { + "External id": 474625,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097698.275, "dur": 1.344, + "args": { + "External id": 474626,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097698.968, "dur": 0.583, + "args": { + "External id": 474627,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097699.903, "dur": 2.325, + "args": { + "External id": 474628,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097701.382, "dur": 0.775, + "args": { + "External id": 474629,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097702.486, "dur": 3.506, + "args": { + "External id": 474630,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097705.188, "dur": 0.739, + "args": { + "External id": 474631,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097706.308, "dur": 1.751, + "args": { + "External id": 474632,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097707.380, "dur": 0.615, + "args": { + "External id": 474633,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097708.403, "dur": 1.617, + "args": { + "External id": 474634,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097709.268, "dur": 0.686, + "args": { + "External id": 474635,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097710.301, "dur": 3.449, + "args": { + "External id": 474636,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097713.001, "dur": 0.687, + "args": { + "External id": 474637,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097716.415, "dur": 1.460, + "args": { + "External id": 474638,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097717.041, "dur": 0.763, + "args": { + "External id": 474639,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097718.119, "dur": 3.424, + "args": { + "External id": 474640,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097719.565, "dur": 1.896, + "args": { + "External id": 474641,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097721.831, "dur": 1.677, + "args": { + "External id": 474642,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097722.488, "dur": 0.958, + "args": { + "External id": 474643,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097723.841, "dur": 1.927, + "args": { + "External id": 474644,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097724.908, "dur": 0.792, + "args": { + "External id": 474645,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097726.141, "dur": 2.764, + "args": { + "External id": 474646,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097728.461, "dur": 0.376, + "args": { + "External id": 474647,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097729.184, "dur": 1.666, + "args": { + "External id": 474648,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097730.280, "dur": 0.495, + "args": { + "External id": 474649,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097731.136, "dur": 1.731, + "args": { + "External id": 474650,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097731.988, "dur": 0.809, + "args": { + "External id": 474651,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097733.159, "dur": 3.703, + "args": { + "External id": 474652,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097736.190, "dur": 0.607, + "args": { + "External id": 474653,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097739.130, "dur": 1.725, + "args": { + "External id": 474654,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097740.091, "dur": 0.598, + "args": { + "External id": 474655,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097741.148, "dur": 3.193, + "args": { + "External id": 474656,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097742.409, "dur": 1.859, + "args": { + "External id": 474657,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338707, "tid": 2338707, + "ts": 6267456097744.603, "dur": 1.354, + "args": { + "External id": 474658,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456097745.295, "dur": 0.599, + "args": { + "External id": 474659,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2338707, "tid": 2338707, + "ts": 6267456097766.119, "dur": 141.783, + "args": { + "External id": 474660,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2338707, "tid": 2338707, + "ts": 6267456098025.286, "dur": 190.588, + "args": { + "External id": 474661,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[273], [], [], [], []], "Ev Idx": 21609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2338707, "tid": 2338707, + "ts": 6267456098128.885, "dur": 63.093, + "args": { + "External id": 474662,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[273], [], [], [], []], "Ev Idx": 21610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338707, "tid": 2338707, + "ts": 6267456098148.148, "dur": 1.801, + "args": { + "External id": 474663,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Redistribute", "pid": 2338707, "tid": 2338707, + "ts": 6267456098670.251, "dur": 1201.493, + "args": { + "External id": 474664,"Sequence number": 5036830, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "False"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2338707, "tid": 2338707, + "ts": 6267456098734.914, "dur": 71.204, + "args": { + "External id": 474665,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456098741.238, "dur": 1.966, + "args": { + "External id": 474666,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456098746.451, "dur": 0.870, + "args": { + "External id": 474667,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 21615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::all_reduce", "pid": 2338707, "tid": 2338707, + "ts": 6267456098881.532, "dur": 591.050, + "args": { + "External id": 474668,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["float", "", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338707, "tid": 2338707, + "ts": 6267456098887.278, "dur": 65.933, + "args": { + "External id": 474669,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338707, "tid": 2338707, + "ts": 6267456098893.205, "dur": 10.516, + "args": { + "External id": 474670,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "0"], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 21618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267456098897.704, "dur": 5.015, + "args": { + "External id": 474671,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 21619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338707, "tid": 2338707, + "ts": 6267456098905.872, "dur": 46.550, + "args": { + "External id": 474672,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::allreduce_", "pid": 2338707, "tid": 2338707, + "ts": 6267456098965.128, "dur": 502.548, + "args": { + "External id": 474673,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "-1"], "Input type": ["TensorList", "", "", "", "Scalar"], "Input Strides": [[[]], [], [], [], []], "Input Dims": [[[]], [], [], [], []], "Ev Idx": 21621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267456099008.256, "dur": 450.165, + "args": { + "External id": 474674,"Record function id": 0, "Collective name": "allreduce", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 1, "Input Strides": [[[]], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "1", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1, "Process Group Name": "0", "Input type": ["TensorList", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[[]], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 21622, "In msg nelems": 1 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:all_reduce", "pid": 2338707, "tid": 2338707, + "ts": 6267456099026.758, "dur": 423.997, + "args": { + "External id": 474675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2338707, "tid": 2338707, + "ts": 6267456099557.167, "dur": 270.439, + "args": { + "External id": 474676,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::wait_tensor", "pid": 2338707, "tid": 2338707, + "ts": 6267456099698.048, "dur": 44.457, + "args": { + "External id": 474677,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338707, "tid": 2338707, + "ts": 6267456099723.301, "dur": 5.853, + "args": { + "External id": 474678,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "1", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 21626, "In msg nelems": 0, "Rank": 1, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2338707, "tid": 2338707, + "ts": 6267456099768.518, "dur": 52.469, + "args": { + "External id": 474679,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456099771.435, "dur": 1.519, + "args": { + "External id": 474680,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456099774.615, "dur": 0.560, + "args": { + "External id": 474681,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 21629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_ToTorchTensor", "pid": 2338707, "tid": 2338707, + "ts": 6267456099890.616, "dur": 25.630, + "args": { + "External id": 474682,"Sequence number": 5036831, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338707, "tid": 2338707, + "ts": 6267456099902.184, "dur": 10.983, + "args": { + "External id": 474683,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338707, "tid": 2338707, + "ts": 6267456099905.717, "dur": 7.217, + "args": { + "External id": 474684,"Record function id": 0, "Concrete Inputs": ["", "[]"], "Input type": ["float", "ScalarList"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338707, "tid": 2338707, + "ts": 6267456100321.613, "dur": 39.311, + "args": { + "External id": 474685,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "double", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reciprocal", "pid": 2338707, "tid": 2338707, + "ts": 6267456100372.493, "dur": 22.451, + "args": { + "External id": 474686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 2338707, "tid": 2338707, + "ts": 6267456100401.743, "dur": 22.001, + "args": { + "External id": 474687,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "double"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clamp", "pid": 2338707, "tid": 2338707, + "ts": 6267456100437.921, "dur": 23.700, + "args": { + "External id": 474688,"Record function id": 0, "Concrete Inputs": ["", "", "1."], "Input type": ["float", "", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456100440.998, "dur": 0.618, + "args": { + "External id": 474689,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 21637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338707, "tid": 2338707, + "ts": 6267456100479.768, "dur": 0.590, + "args": { + "External id": 474690,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2338707, "tid": 2338707, + "ts": 6267456100633.879, "dur": 1150.121, + "args": { + "External id": 474691,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2338707, "tid": 2338707, + "ts": 6267456101094.697, "dur": 644.008, + "args": { + "External id": 474692,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isnan", "pid": 2338707, "tid": 2338707, + "ts": 6267456101839.412, "dur": 37.181, + "args": { + "External id": 474693,"Sequence number": 5036832, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338707, "tid": 2338707, + "ts": 6267456101843.876, "dur": 32.138, + "args": { + "External id": 474694,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338707, "tid": 2338707, + "ts": 6267456101882.686, "dur": 6071.141, + "args": { + "External id": 474695,"Sequence number": 5036832, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2338707, + "ts": 6267456101885.060, "dur": 6067.770, + "args": { + "External id": 474696,"Sequence number": 5036832, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2338707, + "ts": 6267456101888.080, "dur": 6060.357, + "args": { + "External id": 474697,"Sequence number": 5036832, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isinf", "pid": 2338707, "tid": 2338707, + "ts": 6267456107977.111, "dur": 138.112, + "args": { + "External id": 474698,"Sequence number": 5036832, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267456107983.491, "dur": 94.111, + "args": { + "External id": 474699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338707, "tid": 2338707, + "ts": 6267456107998.022, "dur": 6.308, + "args": { + "External id": 474700,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 21648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338707, "tid": 2338707, + "ts": 6267456108010.140, "dur": 66.651, + "args": { + "External id": 474701,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], [1]], "Input Dims": [[], [0]], "Ev Idx": 21649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338707, "tid": 2338707, + "ts": 6267456108017.976, "dur": 4.608, + "args": { + "External id": 474702,"Record function id": 0, "Concrete Inputs": ["", "[]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 21650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338707, "tid": 2338707, + "ts": 6267456108080.905, "dur": 33.239, + "args": { + "External id": 474703,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338707, "tid": 2338707, + "ts": 6267456108120.214, "dur": 47.385, + "args": { + "External id": 474704,"Sequence number": 5036832, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338707, "tid": 2338707, + "ts": 6267456108121.750, "dur": 45.601, + "args": { + "External id": 474705,"Sequence number": 5036832, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338707, "tid": 2338707, + "ts": 6267456108123.431, "dur": 43.544, + "args": { + "External id": 474706,"Sequence number": 5036832, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21654 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#OptimizersContainer.step", "pid": 2338707, "tid": 2338707, + "ts": 6267456108228.012, "dur": 5981.896, + "args": { + "External id": 474707,"Record function id": 0, "Ev Idx": 21655 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#AdamW.step", "pid": 2338707, "tid": 2338707, + "ts": 6267456108276.402, "dur": 5910.144, + "args": { + "External id": 474708,"Record function id": 0, "Ev Idx": 21656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_add_", "pid": 2338707, "tid": 2338707, + "ts": 6267456109678.808, "dur": 222.611, + "args": { + "External id": 474709,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109698.767, "dur": 1.303, + "args": { + "External id": 474710,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109701.696, "dur": 0.086, + "args": { + "External id": 474711,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109702.259, "dur": 0.350, + "args": { + "External id": 474712,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109702.940, "dur": 0.056, + "args": { + "External id": 474713,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109703.386, "dur": 0.389, + "args": { + "External id": 474714,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109704.342, "dur": 0.072, + "args": { + "External id": 474715,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109704.705, "dur": 0.286, + "args": { + "External id": 474716,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109705.307, "dur": 0.272, + "args": { + "External id": 474717,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109705.840, "dur": 0.068, + "args": { + "External id": 474718,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109706.436, "dur": 0.090, + "args": { + "External id": 474719,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109706.858, "dur": 0.097, + "args": { + "External id": 474720,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109707.421, "dur": 0.052, + "args": { + "External id": 474721,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109707.730, "dur": 0.052, + "args": { + "External id": 474722,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109708.254, "dur": 0.048, + "args": { + "External id": 474723,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109708.848, "dur": 0.069, + "args": { + "External id": 474724,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109709.422, "dur": 0.060, + "args": { + "External id": 474725,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109709.783, "dur": 0.053, + "args": { + "External id": 474726,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109710.395, "dur": 0.057, + "args": { + "External id": 474727,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109710.716, "dur": 0.051, + "args": { + "External id": 474728,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109711.179, "dur": 0.069, + "args": { + "External id": 474729,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109711.552, "dur": 0.050, + "args": { + "External id": 474730,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109711.969, "dur": 0.056, + "args": { + "External id": 474731,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109712.284, "dur": 0.048, + "args": { + "External id": 474732,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109712.778, "dur": 0.461, + "args": { + "External id": 474733,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109713.707, "dur": 0.055, + "args": { + "External id": 474734,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109714.219, "dur": 0.229, + "args": { + "External id": 474735,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109714.917, "dur": 0.299, + "args": { + "External id": 474736,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109715.674, "dur": 0.057, + "args": { + "External id": 474737,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109716.176, "dur": 0.073, + "args": { + "External id": 474738,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109716.543, "dur": 0.069, + "args": { + "External id": 474739,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109716.883, "dur": 0.055, + "args": { + "External id": 474740,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109717.332, "dur": 0.057, + "args": { + "External id": 474741,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109717.649, "dur": 0.050, + "args": { + "External id": 474742,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109718.167, "dur": 0.063, + "args": { + "External id": 474743,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109718.502, "dur": 0.047, + "args": { + "External id": 474744,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109718.907, "dur": 0.054, + "args": { + "External id": 474745,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109719.222, "dur": 0.052, + "args": { + "External id": 474746,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109719.722, "dur": 0.067, + "args": { + "External id": 474747,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109720.042, "dur": 0.048, + "args": { + "External id": 474748,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109720.473, "dur": 0.065, + "args": { + "External id": 474749,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109720.818, "dur": 0.048, + "args": { + "External id": 474750,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109721.242, "dur": 0.056, + "args": { + "External id": 474751,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109721.559, "dur": 0.047, + "args": { + "External id": 474752,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109721.974, "dur": 0.057, + "args": { + "External id": 474753,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109722.293, "dur": 0.048, + "args": { + "External id": 474754,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109722.721, "dur": 0.055, + "args": { + "External id": 474755,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109723.035, "dur": 0.048, + "args": { + "External id": 474756,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109723.593, "dur": 0.046, + "args": { + "External id": 474757,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109723.906, "dur": 0.046, + "args": { + "External id": 474758,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109724.312, "dur": 0.058, + "args": { + "External id": 474759,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109724.631, "dur": 0.048, + "args": { + "External id": 474760,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109725.109, "dur": 0.051, + "args": { + "External id": 474761,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109725.428, "dur": 0.053, + "args": { + "External id": 474762,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109725.862, "dur": 0.067, + "args": { + "External id": 474763,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109726.193, "dur": 0.049, + "args": { + "External id": 474764,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109726.622, "dur": 0.054, + "args": { + "External id": 474765,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109726.939, "dur": 0.050, + "args": { + "External id": 474766,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109727.267, "dur": 0.057, + "args": { + "External id": 474767,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109727.584, "dur": 0.051, + "args": { + "External id": 474768,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109727.922, "dur": 0.055, + "args": { + "External id": 474769,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109728.239, "dur": 0.051, + "args": { + "External id": 474770,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109728.668, "dur": 0.055, + "args": { + "External id": 474771,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109728.982, "dur": 0.048, + "args": { + "External id": 474772,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109729.342, "dur": 0.056, + "args": { + "External id": 474773,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109729.657, "dur": 0.051, + "args": { + "External id": 474774,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109730.116, "dur": 0.069, + "args": { + "External id": 474775,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109730.456, "dur": 0.052, + "args": { + "External id": 474776,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109730.862, "dur": 0.055, + "args": { + "External id": 474777,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109731.175, "dur": 0.048, + "args": { + "External id": 474778,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109731.590, "dur": 0.066, + "args": { + "External id": 474779,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109731.923, "dur": 0.070, + "args": { + "External id": 474780,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109732.507, "dur": 0.057, + "args": { + "External id": 474781,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109732.827, "dur": 0.050, + "args": { + "External id": 474782,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109733.271, "dur": 0.057, + "args": { + "External id": 474783,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109733.660, "dur": 0.063, + "args": { + "External id": 474784,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109734.146, "dur": 0.059, + "args": { + "External id": 474785,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109734.467, "dur": 0.050, + "args": { + "External id": 474786,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109734.885, "dur": 0.065, + "args": { + "External id": 474787,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109735.403, "dur": 0.067, + "args": { + "External id": 474788,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109735.854, "dur": 0.056, + "args": { + "External id": 474789,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109736.172, "dur": 0.051, + "args": { + "External id": 474790,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109736.799, "dur": 0.056, + "args": { + "External id": 474791,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109737.155, "dur": 0.051, + "args": { + "External id": 474792,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109737.573, "dur": 0.059, + "args": { + "External id": 474793,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109737.889, "dur": 0.048, + "args": { + "External id": 474794,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109738.350, "dur": 0.057, + "args": { + "External id": 474795,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109738.674, "dur": 0.279, + "args": { + "External id": 474796,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109739.320, "dur": 0.263, + "args": { + "External id": 474797,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109739.859, "dur": 0.399, + "args": { + "External id": 474798,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109740.636, "dur": 0.296, + "args": { + "External id": 474799,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109741.192, "dur": 0.231, + "args": { + "External id": 474800,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109741.864, "dur": 0.066, + "args": { + "External id": 474801,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109742.189, "dur": 0.065, + "args": { + "External id": 474802,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109742.607, "dur": 0.057, + "args": { + "External id": 474803,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109742.921, "dur": 0.054, + "args": { + "External id": 474804,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109743.371, "dur": 0.055, + "args": { + "External id": 474805,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109743.689, "dur": 0.423, + "args": { + "External id": 474806,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109744.416, "dur": 0.326, + "args": { + "External id": 474807,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109745.014, "dur": 0.304, + "args": { + "External id": 474808,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109745.709, "dur": 0.075, + "args": { + "External id": 474809,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109746.060, "dur": 0.051, + "args": { + "External id": 474810,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109746.508, "dur": 0.068, + "args": { + "External id": 474811,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109746.843, "dur": 0.053, + "args": { + "External id": 474812,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109747.270, "dur": 0.092, + "args": { + "External id": 474813,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109747.621, "dur": 0.248, + "args": { + "External id": 474814,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109748.235, "dur": 0.068, + "args": { + "External id": 474815,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109748.563, "dur": 0.051, + "args": { + "External id": 474816,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109748.991, "dur": 0.058, + "args": { + "External id": 474817,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109749.308, "dur": 0.045, + "args": { + "External id": 474818,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109749.735, "dur": 0.068, + "args": { + "External id": 474819,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109750.056, "dur": 0.051, + "args": { + "External id": 474820,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109750.488, "dur": 0.066, + "args": { + "External id": 474821,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109750.822, "dur": 0.055, + "args": { + "External id": 474822,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109751.234, "dur": 0.068, + "args": { + "External id": 474823,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109751.557, "dur": 0.046, + "args": { + "External id": 474824,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109751.892, "dur": 0.061, + "args": { + "External id": 474825,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109752.214, "dur": 0.047, + "args": { + "External id": 474826,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109752.815, "dur": 0.054, + "args": { + "External id": 474827,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109753.128, "dur": 0.047, + "args": { + "External id": 474828,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109753.659, "dur": 0.058, + "args": { + "External id": 474829,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109753.982, "dur": 0.055, + "args": { + "External id": 474830,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109754.516, "dur": 0.060, + "args": { + "External id": 474831,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109755.027, "dur": 0.053, + "args": { + "External id": 474832,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109755.469, "dur": 0.064, + "args": { + "External id": 474833,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109755.793, "dur": 0.055, + "args": { + "External id": 474834,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109756.305, "dur": 0.065, + "args": { + "External id": 474835,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109756.632, "dur": 0.051, + "args": { + "External id": 474836,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109757.036, "dur": 0.069, + "args": { + "External id": 474837,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109757.369, "dur": 0.054, + "args": { + "External id": 474838,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109757.792, "dur": 0.070, + "args": { + "External id": 474839,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109758.119, "dur": 0.053, + "args": { + "External id": 474840,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109758.534, "dur": 0.060, + "args": { + "External id": 474841,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109758.849, "dur": 0.053, + "args": { + "External id": 474842,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109759.291, "dur": 0.058, + "args": { + "External id": 474843,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109759.613, "dur": 0.047, + "args": { + "External id": 474844,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109760.032, "dur": 0.059, + "args": { + "External id": 474845,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109760.354, "dur": 0.069, + "args": { + "External id": 474846,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109760.804, "dur": 0.057, + "args": { + "External id": 474847,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109761.123, "dur": 0.052, + "args": { + "External id": 474848,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109761.687, "dur": 0.062, + "args": { + "External id": 474849,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109762.002, "dur": 0.050, + "args": { + "External id": 474850,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109762.401, "dur": 0.058, + "args": { + "External id": 474851,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109762.716, "dur": 0.054, + "args": { + "External id": 474852,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109763.171, "dur": 0.065, + "args": { + "External id": 474853,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109763.485, "dur": 0.048, + "args": { + "External id": 474854,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109763.914, "dur": 0.055, + "args": { + "External id": 474855,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109764.223, "dur": 0.051, + "args": { + "External id": 474856,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109764.650, "dur": 0.066, + "args": { + "External id": 474857,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109764.962, "dur": 0.054, + "args": { + "External id": 474858,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109765.465, "dur": 0.054, + "args": { + "External id": 474859,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109765.762, "dur": 0.052, + "args": { + "External id": 474860,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109766.199, "dur": 0.064, + "args": { + "External id": 474861,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109766.539, "dur": 0.052, + "args": { + "External id": 474862,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109766.875, "dur": 0.064, + "args": { + "External id": 474863,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109767.209, "dur": 0.053, + "args": { + "External id": 474864,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109767.636, "dur": 0.066, + "args": { + "External id": 474865,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109767.970, "dur": 0.054, + "args": { + "External id": 474866,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109768.383, "dur": 0.059, + "args": { + "External id": 474867,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109768.705, "dur": 0.051, + "args": { + "External id": 474868,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109769.142, "dur": 0.065, + "args": { + "External id": 474869,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109769.482, "dur": 0.062, + "args": { + "External id": 474870,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109769.947, "dur": 0.069, + "args": { + "External id": 474871,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109770.292, "dur": 0.062, + "args": { + "External id": 474872,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109770.752, "dur": 0.065, + "args": { + "External id": 474873,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109771.082, "dur": 0.060, + "args": { + "External id": 474874,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109771.493, "dur": 0.086, + "args": { + "External id": 474875,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109771.848, "dur": 0.050, + "args": { + "External id": 474876,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109772.403, "dur": 0.061, + "args": { + "External id": 474877,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109772.727, "dur": 0.049, + "args": { + "External id": 474878,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109773.153, "dur": 0.068, + "args": { + "External id": 474879,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109773.486, "dur": 0.053, + "args": { + "External id": 474880,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109773.963, "dur": 0.064, + "args": { + "External id": 474881,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109774.287, "dur": 0.047, + "args": { + "External id": 474882,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109774.734, "dur": 0.267, + "args": { + "External id": 474883,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109775.257, "dur": 0.255, + "args": { + "External id": 474884,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109775.888, "dur": 0.321, + "args": { + "External id": 474885,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109776.462, "dur": 0.229, + "args": { + "External id": 474886,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109776.999, "dur": 0.282, + "args": { + "External id": 474887,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109777.542, "dur": 0.068, + "args": { + "External id": 474888,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109778.009, "dur": 0.067, + "args": { + "External id": 474889,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109778.322, "dur": 0.256, + "args": { + "External id": 474890,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109778.919, "dur": 0.059, + "args": { + "External id": 474891,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109779.252, "dur": 0.048, + "args": { + "External id": 474892,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109779.839, "dur": 0.067, + "args": { + "External id": 474893,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109780.150, "dur": 0.052, + "args": { + "External id": 474894,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109780.641, "dur": 0.057, + "args": { + "External id": 474895,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109780.939, "dur": 0.055, + "args": { + "External id": 474896,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109781.385, "dur": 0.065, + "args": { + "External id": 474897,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109781.694, "dur": 0.053, + "args": { + "External id": 474898,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109782.110, "dur": 0.058, + "args": { + "External id": 474899,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109782.414, "dur": 0.050, + "args": { + "External id": 474900,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109782.891, "dur": 0.070, + "args": { + "External id": 474901,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109783.219, "dur": 0.051, + "args": { + "External id": 474902,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109783.562, "dur": 0.069, + "args": { + "External id": 474903,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109783.886, "dur": 0.054, + "args": { + "External id": 474904,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109784.303, "dur": 0.067, + "args": { + "External id": 474905,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109784.628, "dur": 0.054, + "args": { + "External id": 474906,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109785.191, "dur": 0.065, + "args": { + "External id": 474907,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109785.514, "dur": 0.053, + "args": { + "External id": 474908,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109785.962, "dur": 0.058, + "args": { + "External id": 474909,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109786.277, "dur": 0.053, + "args": { + "External id": 474910,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109786.933, "dur": 0.093, + "args": { + "External id": 474911,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109787.479, "dur": 0.071, + "args": { + "External id": 474912,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109787.897, "dur": 0.061, + "args": { + "External id": 474913,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109788.250, "dur": 0.060, + "args": { + "External id": 474914,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109788.729, "dur": 0.063, + "args": { + "External id": 474915,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109789.050, "dur": 0.061, + "args": { + "External id": 474916,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109789.498, "dur": 0.057, + "args": { + "External id": 474917,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109789.812, "dur": 0.048, + "args": { + "External id": 474918,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109790.166, "dur": 0.056, + "args": { + "External id": 474919,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109790.482, "dur": 0.047, + "args": { + "External id": 474920,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109790.863, "dur": 0.061, + "args": { + "External id": 474921,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109791.183, "dur": 0.047, + "args": { + "External id": 474922,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109791.873, "dur": 0.057, + "args": { + "External id": 474923,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109792.188, "dur": 0.050, + "args": { + "External id": 474924,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109792.712, "dur": 0.051, + "args": { + "External id": 474925,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109793.026, "dur": 0.046, + "args": { + "External id": 474926,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109793.359, "dur": 0.056, + "args": { + "External id": 474927,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109793.676, "dur": 0.051, + "args": { + "External id": 474928,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109794.180, "dur": 0.054, + "args": { + "External id": 474929,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109794.570, "dur": 0.047, + "args": { + "External id": 474930,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109795.055, "dur": 0.089, + "args": { + "External id": 474931,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109795.429, "dur": 0.049, + "args": { + "External id": 474932,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109795.894, "dur": 0.056, + "args": { + "External id": 474933,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109796.216, "dur": 0.051, + "args": { + "External id": 474934,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109796.685, "dur": 0.058, + "args": { + "External id": 474935,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109797.006, "dur": 0.048, + "args": { + "External id": 474936,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109797.318, "dur": 0.081, + "args": { + "External id": 474937,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109797.661, "dur": 0.049, + "args": { + "External id": 474938,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109798.602, "dur": 0.054, + "args": { + "External id": 474939,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109798.944, "dur": 0.050, + "args": { + "External id": 474940,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109799.594, "dur": 0.057, + "args": { + "External id": 474941,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109799.929, "dur": 0.047, + "args": { + "External id": 474942,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109800.663, "dur": 0.287, + "args": { + "External id": 474943,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109801.241, "dur": 0.078, + "args": { + "External id": 474944,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109801.972, "dur": 0.058, + "args": { + "External id": 474945,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109802.299, "dur": 0.063, + "args": { + "External id": 474946,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109803.103, "dur": 0.266, + "args": { + "External id": 474947,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109803.652, "dur": 0.064, + "args": { + "External id": 474948,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109804.196, "dur": 0.243, + "args": { + "External id": 474949,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109804.842, "dur": 0.054, + "args": { + "External id": 474950,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109805.609, "dur": 0.058, + "args": { + "External id": 474951,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109805.952, "dur": 0.304, + "args": { + "External id": 474952,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109806.916, "dur": 0.066, + "args": { + "External id": 474953,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109807.445, "dur": 0.243, + "args": { + "External id": 474954,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109808.421, "dur": 0.082, + "args": { + "External id": 474955,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109808.975, "dur": 0.064, + "args": { + "External id": 474956,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109809.698, "dur": 0.292, + "args": { + "External id": 474957,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109810.459, "dur": 0.088, + "args": { + "External id": 474958,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109811.228, "dur": 0.066, + "args": { + "External id": 474959,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109811.578, "dur": 0.097, + "args": { + "External id": 474960,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109812.212, "dur": 0.054, + "args": { + "External id": 474961,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109812.656, "dur": 0.376, + "args": { + "External id": 474962,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109813.776, "dur": 0.060, + "args": { + "External id": 474963,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109814.104, "dur": 0.051, + "args": { + "External id": 474964,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109814.883, "dur": 0.062, + "args": { + "External id": 474965,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109815.225, "dur": 0.049, + "args": { + "External id": 474966,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109815.818, "dur": 0.050, + "args": { + "External id": 474967,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109816.253, "dur": 0.056, + "args": { + "External id": 474968,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109816.806, "dur": 0.067, + "args": { + "External id": 474969,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109817.233, "dur": 0.058, + "args": { + "External id": 474970,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109817.841, "dur": 0.068, + "args": { + "External id": 474971,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109818.379, "dur": 0.056, + "args": { + "External id": 474972,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109819.556, "dur": 0.053, + "args": { + "External id": 474973,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109819.868, "dur": 0.051, + "args": { + "External id": 474974,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109820.818, "dur": 0.065, + "args": { + "External id": 474975,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109821.142, "dur": 0.050, + "args": { + "External id": 474976,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109821.856, "dur": 0.080, + "args": { + "External id": 474977,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109822.192, "dur": 0.049, + "args": { + "External id": 474978,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109823.064, "dur": 0.068, + "args": { + "External id": 474979,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109823.423, "dur": 0.051, + "args": { + "External id": 474980,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109824.030, "dur": 0.066, + "args": { + "External id": 474981,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338707, "tid": 2338707, + "ts": 6267456109824.543, "dur": 0.072, + "args": { + "External id": 474982,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2338707, "tid": 2338707, + "ts": 6267456110417.637, "dur": 3630.746, + "args": { + "External id": 474983,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "1.7131852859200711e-05", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 21931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2338707, "tid": 2338707, + "ts": 6267456112893.249, "dur": 976.983, + "args": { + "External id": 474984,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "1.7131852859200711e-05", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 21932 + } + }, + { + "name": "process_name", "ph": "M", "ts": 6267449557192.309, "pid": 2338707, "tid": 0, + "args": { + "name": "python3.12" + } + }, + { + "name": "process_labels", "ph": "M", "ts": 6267449557192.309, "pid": 2338707, "tid": 0, + "args": { + "labels": "CPU" + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 6267449557192.309, "pid": 2338707, "tid": 0, + "args": { + "sort_index": 2338707 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 6267449557192.309, "pid": 2338707, "tid": 2379456, + "args": { + "name": "thread 2379456 (pt_autograd_1)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 6267449557192.309, "pid": 2338707, "tid": 2379456, + "args": { + "sort_index": 2379456 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 6267449557192.309, "pid": 2338707, "tid": 2379456, + "args": { + "name": "thread 2379456 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 6267449557192.309, "pid": 2338707, "tid": 2379456, + "args": { + "sort_index": 2379456 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 6267449557192.309, "pid": 2338707, "tid": 2338707, + "args": { + "name": "thread 2338707 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 6267449557192.309, "pid": 2338707, "tid": 2338707, + "args": { + "sort_index": 2338707 + } + }, + { + "ph": "X", "cat": "Trace", "ts": 6267449557121.949, "dur": 6563997.255, + "pid": "Spans", "tid": "PyTorch Profiler", + "name": "PyTorch Profiler (0)", + "args": { + "Op count": 0 + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 6267449557121.949, + "pid": "Spans", "tid": 0, + "args": { + "sort_index": 536870912 + } + }, + { + "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g", + "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 6267449557121.949 + }, + { + "name": "Record Window End", "ph": "i", "s": "g", + "pid": "", "tid": "", "ts": 6267456216869.969 + } + ], + "traceName": "exp/mtp.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine/profile_trace/iteration_10752/rank1_trace.json", + "displayTimeUnit": "ms", + "baseTimeNanoseconds": 1751410836000000000 +} \ No newline at end of file