| { |
| "Average": { |
| "tensorizer": { |
| "StaticProfiler::AverageFractalPeUtilization": 99.8321762084961, |
| "StaticProfiler::AveragePartitionUtilization": 99.3888168334961, |
| "StaticProfiler::AveragePeUtilization": 99.65400695800781, |
| "StaticProfiler::LocalizationEfficiency": 109.9806137084961, |
| "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 110.06793212890625, |
| "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0, |
| "TilingProfiler::AveragePeUtilizationAfterTiling": 0 |
| } |
| }, |
| "Count": { |
| "tensorizer": { |
| "StaticProfiler::AverageFractalPeUtilization": 1, |
| "StaticProfiler::AveragePartitionUtilization": 1, |
| "StaticProfiler::AveragePeUtilization": 1, |
| "StaticProfiler::LocalizationEfficiency": 1, |
| "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 1, |
| "TilingProfiler::AveragePartitionUtilizationAfterTiling": 1, |
| "TilingProfiler::AveragePeUtilizationAfterTiling": 1 |
| } |
| }, |
| "Sum": { |
| "compiletime": { |
| "AGOrderingAnalysisPass": 1.4457588195800781, |
| "AffinePredicateResolution": 0.05167531967163086, |
| "AliasDependencyElimination": 0.0026276111602783203, |
| "AliasDependencyInduction": 0.44934630393981934, |
| "AliasDependencyReset": 1.2677826881408691, |
| "BFComputeCutting": 0.06423807144165039, |
| "BirCodeGenLoop": 2.421293258666992, |
| "CCOpFusion": 0.41050028800964355, |
| "CanonicalizeConv": 9.999999974752427e-07, |
| "CanonicalizeDAGForPGTiling": 0.21233797073364258, |
| "CanonicalizeForTensorizer": 0.0003640000068116933, |
| "CanonicalizeIR": 0.06626629829406738, |
| "Canonicalizer": 0.007044999860227108, |
| "CoalesceCCOp": 0.19146490097045898, |
| "CommuteConcat": 0.03319668769836426, |
| "DMALocalityOpt": 0.035207271575927734, |
| "DMAProfiler": 0.08866691589355469, |
| "DMATilingProfiler": 0.07109546661376953, |
| "DataLocalityOpt": 1.910703182220459, |
| "DataStreaming": 0.15389323234558105, |
| "DeConcat": 0.012087583541870117, |
| "DeadCodeElimination": 0.035611867904663086, |
| "DeadStoreElimination": 0.37193870544433594, |
| "DelinearIndices": 0.2894127368927002, |
| "Delinearization": 0.1295926570892334, |
| "DoNothing": 0.00019550323486328125, |
| "DramToDramTranspose": 1.0679569244384766, |
| "DumpGraphAndMetadata": 0.24142217636108398, |
| "EliminateDivs": 0.17337489128112793, |
| "ExpandBatchNorm": 0.06027984619140625, |
| "ExpandISAMacro": 0.0909569263458252, |
| "FactorizeBlkDims": 0.24945974349975586, |
| "FactorizeThreadAxesInFreeDims": 0.03613853454589844, |
| "FlattenMacroLoop": 0.26774168014526367, |
| "GenericAccessSimplifier": 0.03175926208496094, |
| "HoistCompute": 4.8000001697801054e-05, |
| "IdentifyCrossPassTensors": 0.00013600000238511711, |
| "InferInitValue": 1.029360294342041, |
| "InferIntrinsicOnCC": 0.34307408332824707, |
| "InferNeuronTensor": 1.7935998439788818, |
| "InferNonlocalTensors": 3.6307339668273926, |
| "InferPSumTensor": 0.9782986640930176, |
| "InlineNativeKernels": 0.05374264717102051, |
| "InsertIOTransposes": 1.162278652191162, |
| "InsertLocalTransposes": 1.0349645614624023, |
| "InsertOffloadedTransposes": 0.0943443775177002, |
| "LICM": 0.1061861515045166, |
| "LateLegalizeInst": 0.22754216194152832, |
| "LateLegalizePostSplit": 0.09247255325317383, |
| "LateLowerReshapeOp": 0.04053616523742676, |
| "LateLowerTensorOp": 0.3356895446777344, |
| "LateNeuronInstComb": 0.4516925811767578, |
| "LayoutPreprocessing": 0.9441671371459961, |
| "LayoutPreprocessingAndAnalysis": 1.2680203914642334, |
| "LayoutRequirementAnalysis": 0.309098482131958, |
| "LegalizeCCOpLayout": 0.07318258285522461, |
| "LegalizeOpLevelAlias": 0.03343796730041504, |
| "LegalizePartitionReduce": 0.034781694412231445, |
| "LegalizeSundaAccess": 1.4558701515197754, |
| "LegalizeSundaMacro": 0.37755250930786133, |
| "LegalizeType": 0.20858454704284668, |
| "LocalLayoutOpt": 0.36218762397766113, |
| "LoopFusion": 0.31240200996398926, |
| "LoopSplitting": 0.013066768646240234, |
| "LowerBroadcast": 0.047890663146972656, |
| "LowerCCOpBlockAxis": 0.23094987869262695, |
| "LowerComplexBroadcast": 0.15572404861450195, |
| "LowerIntrinsics": 1.228858470916748, |
| "LowerTensorOp": 0.4897449016571045, |
| "LowerTranspose": 0.3995330333709717, |
| "MacroGeneration": 2.335334062576294, |
| "MaskPropagation": 0.14433836936950684, |
| "MemcastMotion": 0.00013000000035390258, |
| "MemcpyElimination": 3.9867260456085205, |
| "MutateDataType": 0.04344511032104492, |
| "NeuronAliasDependencyInduction": 0.025929927825927734, |
| "NeuronAliasDependencyReset": 0.04254412651062012, |
| "NeuronInstComb": 0.19350981712341309, |
| "NeuronLICM": 0.2897522449493408, |
| "NeuronLoopFusion": 0.4089043140411377, |
| "NeuronLoopInterchange": 0.04476189613342285, |
| "NeuronSimplifier": 0.30055856704711914, |
| "NeuronSimplifyPredicates": 0.18221426010131836, |
| "NeuronValueNumbering": 0.10663247108459473, |
| "OptimizeAliasedCopyChain": 0.01511383056640625, |
| "OptimizeNKIKernels": 0.4606451988220215, |
| "PAGLayoutOpt": 26.32272720336914, |
| "PComputeCutting": 0.302201509475708, |
| "PGLayoutTilingPipeline": 38.88710403442383, |
| "PGTiling": 4.423768043518066, |
| "PadElimination": 0.008622884750366211, |
| "ParAxesAnnotation": 25.272018432617188, |
| "PartialLoopFusion": 0.2368309497833252, |
| "PartialSimdFusion": 0.20722246170043945, |
| "PenguinizeFunctions": 0.00015999999595806003, |
| "PerfectLoopNest": 0.06273055076599121, |
| "PruneFunctions": 0.00016700000560376793, |
| "RecognizeOpIdiom": 0.20455479621887207, |
| "Recompute": 0.00649714469909668, |
| "RelaxPredicates": 0.154876708984375, |
| "Rematerialization": 0.16764259338378906, |
| "RemoveOptimizationBarriers": 0.00014099999680183828, |
| "ReshapeWeights": 0.021569013595581055, |
| "ResolveAccessConflict": 0.24012255668640137, |
| "ResolveComplicatePredicates": 0.05034017562866211, |
| "RewriteReplicationMatmul": 0.04589343070983887, |
| "RewriteWeights": 0.05840659141540527, |
| "SFKVectorizer": 3.1227571964263916, |
| "ScatterMotion": 0.0041600000113248825, |
| "SimpleAllReduceTiling": 0.06594347953796387, |
| "Simplifier": 0.11366057395935059, |
| "SimplifyMacroPredicates": 0.18840670585632324, |
| "SimplifyNeuronTensor": 1.3299446105957031, |
| "SimplifySlice": 0.03386688232421875, |
| "SimplifyTensor": 0.21405529975891113, |
| "SpillPSum": 0.5441117286682129, |
| "SplitAPUnionSets": 0.3313255310058594, |
| "SplitAccGrp": 0.03839588165283203, |
| "StaticProfiler": 0.13296246528625488, |
| "StaticTransposeLocalTensor": 0.21724367141723633, |
| "SundaISel": 1.6302134990692139, |
| "TCTransform": 0.03438615798950195, |
| "TensorInitialization": 0.13414645195007324, |
| "TensorOpSimplifier": 0.27712535858154297, |
| "TensorOpTransform": 0.8646912574768066, |
| "TensorizerLegalizationPass": 0.000155999994603917, |
| "TileCCOps": 0.263721227645874, |
| "TilingProfiler": 0.39296984672546387, |
| "TransformConvOp": 0.06336498260498047, |
| "TritiumFusion": 1.0901517868041992, |
| "ValueNumbering": 0.09328150749206543, |
| "VectorizeDMA": 0.03394460678100586, |
| "VectorizeMatMult": 0.0209348201751709, |
| "VerifySupportedOps": 0.00023200000578071922, |
| "WeightCoalescing": 0.05484199523925781, |
| "ZeroSizeTensorElimination": 0.0004336833953857422, |
| "algsimp": 0.0020280000753700733, |
| "batchnorm_expander": 0.0007249999907799065, |
| "boundary-marker-removal": 0.0004140000091865659, |
| "call-inliner": 0.0002570000069681555, |
| "canonicalize-boundary-marker": 0.00044800000614486635, |
| "collective-stream-id-checker": 7.000000186963007e-05, |
| "comparison-expander": 0.00041700000292621553, |
| "computation-deduplicator": 0.0004440000047907233, |
| "conditional-to-select": 8.70000003487803e-05, |
| "config-lowering": 0.00020700000459328294, |
| "constant_folding": 0.00016900000628083944, |
| "cse": 0.00043799998820759356, |
| "dce": 3.899999865097925e-05, |
| "dynamic-slice-transpose": 0.00015799999528098851, |
| "eliminate-redundant-compare": 0.0001539999939268455, |
| "emit-offloaded-dropout": 0.0002770000137388706, |
| "flatten-call-graph": 0.000299000006634742, |
| "fuse-send-recv": 0.0015030000358819962, |
| "hilo::LegalizeAlias": 0.003281000070273876, |
| "hilo::NeuronInstCombine": 0.0011020000092685223, |
| "hilo::NeuronOpFusion": 0.0003429999924264848, |
| "hilo::ReplaceTokenTypeWithU8Pass": 0.00018600000475998968, |
| "hilo::ScheduleFusion": 3.5000000934815034e-05, |
| "hilo::SixtyFourHack": 0.00020599999697878957, |
| "hilo::VerifyAliasing": 7.000000186963007e-05, |
| "hlo-mac-count": 0.0006559999892488122, |
| "hlo-verifier": 0.006031000055372715, |
| "io-con-pipe-begin": 4.999999873689376e-06, |
| "io-con-pipe-end": 9.999999974752427e-07, |
| "io-layout-normalization": 0.0009500000160187483, |
| "legalize-ccops": 1.700000029813964e-05, |
| "legalize-compare": 0.00036899998667649925, |
| "lower-argminmax-custom-call": 0.00013800000306218863, |
| "map-inline": 0.0006319999811239541, |
| "metadata-naming": 0.0009749999735504389, |
| "mlir::detail::OpToOpPassAdaptor": 0.00022499999613501132, |
| "mlir::hlo::MhloToPyPenguin": 0.025104999542236328, |
| "mlir::mhlo::LowerComplexExtraPass": 0.002770999912172556, |
| "mlir::mhlo::LowerComplexPass": 0.001180000021122396, |
| "native-to-custom-softmax": 0.00041199999395757914, |
| "native-to-custom-softmax-dx": 0.00042600001324899495, |
| "operand_upcaster": 0.0007089999853633344, |
| "post-par-pipe-begin": 9.999999974752427e-07, |
| "post-par-pipe-end": 0.0, |
| "post-partition-simplification": 0.05639899894595146, |
| "pre-hlo-begin": 4.999999873689376e-06, |
| "pre-hlo-end": 9.999999974752427e-07, |
| "replace-minimum-constant": 0.0002209999947808683, |
| "reshape-mover": 7.400000322377309e-05, |
| "simplify-concat": 0.0018210000125691295, |
| "simplify-while-loops": 5.500000042957254e-05, |
| "transform-variadic-reduce": 0.0006440000142902136, |
| "tuple-simplifier": 0.00016700000560376793, |
| "unpack-nested-aws-ntwsr": 0.00035700001171790063, |
| "unroll-while-loop": 1.1000000085914508e-05 |
| }, |
| "hilo": { |
| "HloMacCount": 3802996736.0, |
| "Traffic": 8267154432.0 |
| }, |
| "tensorizer": { |
| "DMATilingProfiler::TotalInstructionsAfterTiling": 262321, |
| "StaticProfiler::AifUb": 10.559271812438965, |
| "StaticProfiler::ArithmeticIntensityTensorizer": 11.613152503967285, |
| "StaticProfiler::AverageDmaLength": 6652.8759765625, |
| "StaticProfiler::DDRTransferBytes": 7587185496, |
| "StaticProfiler::InternalTransferBytes": 632323092, |
| "StaticProfiler::LoadExpanded": 1033407, |
| "StaticProfiler::StoreExpanded": 3422, |
| "StaticProfiler::TotalDMAExpanded": 1036829, |
| "StaticProfiler::TotalDynamicInstancesCount": 275548, |
| "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 274994, |
| "StaticProfiler::TotalLNCComm": 0, |
| "StaticProfiler::TotalLNCCommTransfer": 0, |
| "TilingProfiler::BatchnormInstructionsAfterTiling": 0, |
| "TilingProfiler::DmaInstructionsAfterTiling": 0, |
| "TilingProfiler::GenericInstructionsAfterTiling": 79, |
| "TilingProfiler::MatMultInstructionsAfterTiling": 231408, |
| "TilingProfiler::NumPfTransposes": 398, |
| "TilingProfiler::NumPfTransposesForIo": 37, |
| "TilingProfiler::NumPfTransposesForLocal": 216, |
| "TilingProfiler::NumPfTransposesForNonlocal": 145, |
| "TilingProfiler::PfTransposeInstructions": 19513, |
| "TilingProfiler::PfTransposeInstructionsForIo": 19152, |
| "TilingProfiler::PfTransposeInstructionsForLocal": 216, |
| "TilingProfiler::PfTransposeInstructionsForNonlocal": 145, |
| "TilingProfiler::ReduceInstructionsAfterTiling": 74, |
| "TilingProfiler::SimdInstructionsAfterTiling": 2999, |
| "TilingProfiler::TotalInstructionsAfterTiling": 0, |
| "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0, |
| "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0, |
| "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0, |
| "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0, |
| "TransformConvOp::conv2d_column_packing": 0, |
| "TransformConvOp::conv2d_column_packing_1": 0, |
| "TransformConvOp::conv2d_column_packing_io10": 0, |
| "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0 |
| } |
| }, |
| "all": { |
| "compiletime": { |
| "CanonicalizeConv": 9.999999974752427e-07, |
| "CanonicalizeForTensorizer": 0.0003640000068116933, |
| "Canonicalizer": 0.007044999860227108, |
| "HoistCompute": 4.8000001697801054e-05, |
| "IdentifyCrossPassTensors": 0.00013600000238511711, |
| "MemcastMotion": 0.00013000000035390258, |
| "PenguinizeFunctions": 0.00015999999595806003, |
| "PruneFunctions": 0.00016700000560376793, |
| "RemoveOptimizationBarriers": 0.00014099999680183828, |
| "ScatterMotion": 0.0041600000113248825, |
| "TensorizerLegalizationPass": 0.000155999994603917, |
| "VerifySupportedOps": 0.00023200000578071922, |
| "algsimp": 0.0020280000753700733, |
| "batchnorm_expander": 0.0007249999907799065, |
| "boundary-marker-removal": 0.0004140000091865659, |
| "call-inliner": 0.0002570000069681555, |
| "canonicalize-boundary-marker": 0.00044800000614486635, |
| "collective-stream-id-checker": 7.000000186963007e-05, |
| "comparison-expander": 0.00041700000292621553, |
| "computation-deduplicator": 0.0004440000047907233, |
| "conditional-to-select": 8.70000003487803e-05, |
| "config-lowering": 0.00020700000459328294, |
| "constant_folding": 0.00016900000628083944, |
| "cse": 0.00043799998820759356, |
| "dce": 3.899999865097925e-05, |
| "dynamic-slice-transpose": 0.00015799999528098851, |
| "eliminate-redundant-compare": 0.0001539999939268455, |
| "emit-offloaded-dropout": 0.0002770000137388706, |
| "flatten-call-graph": 0.000299000006634742, |
| "fuse-send-recv": 0.0015030000358819962, |
| "hilo::LegalizeAlias": 0.003281000070273876, |
| "hilo::NeuronInstCombine": 0.0011020000092685223, |
| "hilo::NeuronOpFusion": 0.0003429999924264848, |
| "hilo::ReplaceTokenTypeWithU8Pass": 0.00018600000475998968, |
| "hilo::ScheduleFusion": 3.5000000934815034e-05, |
| "hilo::SixtyFourHack": 0.00020599999697878957, |
| "hilo::VerifyAliasing": 7.000000186963007e-05, |
| "hlo-mac-count": 0.0006559999892488122, |
| "hlo-verifier": 0.006031000055372715, |
| "io-con-pipe-begin": 4.999999873689376e-06, |
| "io-con-pipe-end": 9.999999974752427e-07, |
| "io-layout-normalization": 0.0009500000160187483, |
| "legalize-ccops": 1.700000029813964e-05, |
| "legalize-compare": 0.00036899998667649925, |
| "lower-argminmax-custom-call": 0.00013800000306218863, |
| "map-inline": 0.0006319999811239541, |
| "metadata-naming": 0.0009749999735504389, |
| "mlir::detail::OpToOpPassAdaptor": 0.00022499999613501132, |
| "mlir::hlo::MhloToPyPenguin": 0.025104999542236328, |
| "mlir::mhlo::LowerComplexExtraPass": 0.002770999912172556, |
| "mlir::mhlo::LowerComplexPass": 0.001180000021122396, |
| "native-to-custom-softmax": 0.00041199999395757914, |
| "native-to-custom-softmax-dx": 0.00042600001324899495, |
| "operand_upcaster": 0.0007089999853633344, |
| "post-par-pipe-begin": 9.999999974752427e-07, |
| "post-par-pipe-end": 0.0, |
| "post-partition-simplification": 0.05639899894595146, |
| "pre-hlo-begin": 4.999999873689376e-06, |
| "pre-hlo-end": 9.999999974752427e-07, |
| "replace-minimum-constant": 0.0002209999947808683, |
| "reshape-mover": 7.400000322377309e-05, |
| "simplify-concat": 0.0018210000125691295, |
| "simplify-while-loops": 5.500000042957254e-05, |
| "transform-variadic-reduce": 0.0006440000142902136, |
| "tuple-simplifier": 0.00016700000560376793, |
| "unpack-nested-aws-ntwsr": 0.00035700001171790063, |
| "unroll-while-loop": 1.1000000085914508e-05 |
| } |
| }, |
| "cumsum": { |
| "compiletime": { |
| "CoalesceCCOp": 0.0008378028869628906, |
| "DMALocalityOpt": 0.0003306865692138672, |
| "DMAProfiler": 0.0007596015930175781, |
| "DataStreaming": 0.0002918243408203125, |
| "DoNothing": 0.00012636184692382813, |
| "ExpandISAMacro": 0.0005497932434082031, |
| "FactorizeBlkDims": 0.0004723072052001953, |
| "InferPSumTensor": 0.000583648681640625, |
| "LateLegalizeInst": 0.00040459632873535156, |
| "LateNeuronInstComb": 0.0004837512969970703, |
| "LegalizeSundaAccess": 0.0015611648559570313, |
| "LegalizeType": 0.00025010108947753906, |
| "LowerBroadcast": 0.0009808540344238281, |
| "LowerIntrinsics": 0.0002262592315673828, |
| "LowerTranspose": 0.00021767616271972656, |
| "NeuronInstComb": 0.0004963874816894531, |
| "NeuronLICM": 0.0006859302520751953, |
| "NeuronSimplifyPredicates": 0.002815723419189453, |
| "NeuronValueNumbering": 0.0004124641418457031, |
| "SFKVectorizer": 0.0027742385864257813, |
| "SimpleAllReduceTiling": 0.000209808349609375, |
| "SimplifyNeuronTensor": 0.00040721893310546875, |
| "SpillPSum": 0.0009286403656005859, |
| "WeightCoalescing": 0.0002105236053466797 |
| } |
| }, |
| "sg00": { |
| "hilo": { |
| "ArithmeticIntensity": 0.9200255870819092, |
| "HloMacCount": 3802996736.0, |
| "Traffic": 8267154432.0 |
| } |
| }, |
| "sg0000": { |
| "compiletime": { |
| "AGOrderingAnalysisPass": 1.4457588195800781, |
| "AffinePredicateResolution": 0.05167531967163086, |
| "AliasDependencyElimination": 0.0026276111602783203, |
| "AliasDependencyInduction": 0.44934630393981934, |
| "AliasDependencyReset": 1.2677826881408691, |
| "BFComputeCutting": 0.06423807144165039, |
| "BirCodeGenLoop": 2.421293258666992, |
| "CCOpFusion": 0.41050028800964355, |
| "CanonicalizeDAGForPGTiling": 0.21233797073364258, |
| "CanonicalizeIR": 0.06626629829406738, |
| "CoalesceCCOp": 0.1906270980834961, |
| "CommuteConcat": 0.03319668769836426, |
| "DMALocalityOpt": 0.03487658500671387, |
| "DMAProfiler": 0.08790731430053711, |
| "DMATilingProfiler": 0.07109546661376953, |
| "DataLocalityOpt": 1.910703182220459, |
| "DataStreaming": 0.15360140800476074, |
| "DeConcat": 0.012087583541870117, |
| "DeadCodeElimination": 0.035611867904663086, |
| "DeadStoreElimination": 0.37193870544433594, |
| "DelinearIndices": 0.2894127368927002, |
| "Delinearization": 0.1295926570892334, |
| "DoNothing": 6.914138793945313e-05, |
| "DramToDramTranspose": 1.0679569244384766, |
| "DumpGraphAndMetadata": 0.24142217636108398, |
| "EliminateDivs": 0.17337489128112793, |
| "ExpandBatchNorm": 0.06027984619140625, |
| "ExpandISAMacro": 0.09040713310241699, |
| "FactorizeBlkDims": 0.24898743629455566, |
| "FactorizeThreadAxesInFreeDims": 0.03613853454589844, |
| "FlattenMacroLoop": 0.26774168014526367, |
| "GenericAccessSimplifier": 0.03175926208496094, |
| "InferInitValue": 1.029360294342041, |
| "InferIntrinsicOnCC": 0.34307408332824707, |
| "InferNeuronTensor": 1.7935998439788818, |
| "InferNonlocalTensors": 3.6307339668273926, |
| "InferPSumTensor": 0.977715015411377, |
| "InlineNativeKernels": 0.05374264717102051, |
| "InsertIOTransposes": 1.162278652191162, |
| "InsertLocalTransposes": 1.0349645614624023, |
| "InsertOffloadedTransposes": 0.0943443775177002, |
| "LICM": 0.1061861515045166, |
| "LateLegalizeInst": 0.22713756561279297, |
| "LateLegalizePostSplit": 0.09247255325317383, |
| "LateLowerReshapeOp": 0.04053616523742676, |
| "LateLowerTensorOp": 0.3356895446777344, |
| "LateNeuronInstComb": 0.45120882987976074, |
| "LayoutPreprocessing": 0.9441671371459961, |
| "LayoutPreprocessingAndAnalysis": 1.2680203914642334, |
| "LayoutRequirementAnalysis": 0.309098482131958, |
| "LegalizeCCOpLayout": 0.07318258285522461, |
| "LegalizeOpLevelAlias": 0.03343796730041504, |
| "LegalizePartitionReduce": 0.034781694412231445, |
| "LegalizeSundaAccess": 1.4543089866638184, |
| "LegalizeSundaMacro": 0.37755250930786133, |
| "LegalizeType": 0.20833444595336914, |
| "LocalLayoutOpt": 0.36218762397766113, |
| "LoopFusion": 0.31240200996398926, |
| "LoopSplitting": 0.013066768646240234, |
| "LowerBroadcast": 0.04690980911254883, |
| "LowerCCOpBlockAxis": 0.23094987869262695, |
| "LowerComplexBroadcast": 0.15572404861450195, |
| "LowerIntrinsics": 1.2286322116851807, |
| "LowerTensorOp": 0.4897449016571045, |
| "LowerTranspose": 0.39931535720825195, |
| "MacroGeneration": 2.335334062576294, |
| "MaskPropagation": 0.14433836936950684, |
| "MemcpyElimination": 3.9867260456085205, |
| "MutateDataType": 0.04344511032104492, |
| "NeuronAliasDependencyInduction": 0.025929927825927734, |
| "NeuronAliasDependencyReset": 0.04254412651062012, |
| "NeuronInstComb": 0.19301342964172363, |
| "NeuronLICM": 0.2890663146972656, |
| "NeuronLoopFusion": 0.4089043140411377, |
| "NeuronLoopInterchange": 0.04476189613342285, |
| "NeuronSimplifier": 0.30055856704711914, |
| "NeuronSimplifyPredicates": 0.1793985366821289, |
| "NeuronValueNumbering": 0.10622000694274902, |
| "OptimizeAliasedCopyChain": 0.01511383056640625, |
| "OptimizeNKIKernels": 0.4606451988220215, |
| "PAGLayoutOpt": 26.32272720336914, |
| "PComputeCutting": 0.302201509475708, |
| "PGLayoutTilingPipeline": 38.88710403442383, |
| "PGTiling": 4.423768043518066, |
| "PadElimination": 0.008622884750366211, |
| "ParAxesAnnotation": 25.272018432617188, |
| "PartialLoopFusion": 0.2368309497833252, |
| "PartialSimdFusion": 0.20722246170043945, |
| "PerfectLoopNest": 0.06273055076599121, |
| "RecognizeOpIdiom": 0.20455479621887207, |
| "Recompute": 0.00649714469909668, |
| "RelaxPredicates": 0.154876708984375, |
| "Rematerialization": 0.16764259338378906, |
| "ReshapeWeights": 0.021569013595581055, |
| "ResolveAccessConflict": 0.24012255668640137, |
| "ResolveComplicatePredicates": 0.05034017562866211, |
| "RewriteReplicationMatmul": 0.04589343070983887, |
| "RewriteWeights": 0.05840659141540527, |
| "SFKVectorizer": 3.119982957839966, |
| "SimpleAllReduceTiling": 0.06573367118835449, |
| "Simplifier": 0.11366057395935059, |
| "SimplifyMacroPredicates": 0.18840670585632324, |
| "SimplifyNeuronTensor": 1.3295373916625977, |
| "SimplifySlice": 0.03386688232421875, |
| "SimplifyTensor": 0.21405529975891113, |
| "SpillPSum": 0.5431830883026123, |
| "SplitAPUnionSets": 0.3313255310058594, |
| "SplitAccGrp": 0.03839588165283203, |
| "StaticProfiler": 0.13296246528625488, |
| "StaticTransposeLocalTensor": 0.21724367141723633, |
| "SundaISel": 1.6302134990692139, |
| "TCTransform": 0.03438615798950195, |
| "TensorInitialization": 0.13414645195007324, |
| "TensorOpSimplifier": 0.27712535858154297, |
| "TensorOpTransform": 0.8646912574768066, |
| "TileCCOps": 0.263721227645874, |
| "TilingProfiler": 0.39296984672546387, |
| "TransformConvOp": 0.06336498260498047, |
| "TritiumFusion": 1.0901517868041992, |
| "ValueNumbering": 0.09328150749206543, |
| "VectorizeDMA": 0.03394460678100586, |
| "VectorizeMatMult": 0.0209348201751709, |
| "WeightCoalescing": 0.05463147163391113, |
| "ZeroSizeTensorElimination": 0.0004336833953857422 |
| }, |
| "tensorizer": { |
| "DMATilingProfiler::TotalInstructionsAfterTiling": 262321, |
| "StaticProfiler::AifUb": 10.559271812438965, |
| "StaticProfiler::ArithmeticIntensityTensorizer": 11.613152503967285, |
| "StaticProfiler::AverageDmaLength": 6652.8759765625, |
| "StaticProfiler::AverageFractalPeUtilization": 99.8321762084961, |
| "StaticProfiler::AveragePartitionUtilization": 99.3888168334961, |
| "StaticProfiler::AveragePeUtilization": 99.65400695800781, |
| "StaticProfiler::DDRTransferBytes": 7587185496, |
| "StaticProfiler::InternalTransferBytes": 632323092, |
| "StaticProfiler::LoadExpanded": 1033407, |
| "StaticProfiler::LocalizationEfficiency": 109.9806137084961, |
| "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 110.06793212890625, |
| "StaticProfiler::StoreExpanded": 3422, |
| "StaticProfiler::TotalDMAExpanded": 1036829, |
| "StaticProfiler::TotalDynamicInstancesCount": 275548, |
| "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 274994, |
| "StaticProfiler::TotalLNCComm": 0, |
| "StaticProfiler::TotalLNCCommTransfer": 0, |
| "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0, |
| "TilingProfiler::AveragePeUtilizationAfterTiling": 0, |
| "TilingProfiler::BatchnormInstructionsAfterTiling": 0, |
| "TilingProfiler::DmaInstructionsAfterTiling": 0, |
| "TilingProfiler::GenericInstructionsAfterTiling": 79, |
| "TilingProfiler::MatMultInstructionsAfterTiling": 231408, |
| "TilingProfiler::NumPfTransposes": 398, |
| "TilingProfiler::NumPfTransposesForIo": 37, |
| "TilingProfiler::NumPfTransposesForLocal": 216, |
| "TilingProfiler::NumPfTransposesForNonlocal": 145, |
| "TilingProfiler::PfTransposeInstructions": 19513, |
| "TilingProfiler::PfTransposeInstructionsForIo": 19152, |
| "TilingProfiler::PfTransposeInstructionsForLocal": 216, |
| "TilingProfiler::PfTransposeInstructionsForNonlocal": 145, |
| "TilingProfiler::ReduceInstructionsAfterTiling": 74, |
| "TilingProfiler::SimdInstructionsAfterTiling": 2999, |
| "TilingProfiler::TotalInstructionsAfterTiling": 0, |
| "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0, |
| "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0, |
| "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0, |
| "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0, |
| "TransformConvOp::conv2d_column_packing": 0, |
| "TransformConvOp::conv2d_column_packing_1": 0, |
| "TransformConvOp::conv2d_column_packing_io10": 0, |
| "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0 |
| } |
| } |
| } |