Qwen3-8BSharded / token_generation_model /_tp0_bk0 /global_metric_store.json
jburtoft's picture
Upload folder using huggingface_hub
ee61cf7 verified
{
"Average": {
"tensorizer": {
"StaticProfiler::AverageFractalPeUtilization": 99.8321762084961,
"StaticProfiler::AveragePartitionUtilization": 99.3888168334961,
"StaticProfiler::AveragePeUtilization": 99.65400695800781,
"StaticProfiler::LocalizationEfficiency": 109.9806137084961,
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 110.06793212890625,
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0,
"TilingProfiler::AveragePeUtilizationAfterTiling": 0
}
},
"Count": {
"tensorizer": {
"StaticProfiler::AverageFractalPeUtilization": 1,
"StaticProfiler::AveragePartitionUtilization": 1,
"StaticProfiler::AveragePeUtilization": 1,
"StaticProfiler::LocalizationEfficiency": 1,
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 1,
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 1,
"TilingProfiler::AveragePeUtilizationAfterTiling": 1
}
},
"Sum": {
"compiletime": {
"AGOrderingAnalysisPass": 1.4457588195800781,
"AffinePredicateResolution": 0.05167531967163086,
"AliasDependencyElimination": 0.0026276111602783203,
"AliasDependencyInduction": 0.44934630393981934,
"AliasDependencyReset": 1.2677826881408691,
"BFComputeCutting": 0.06423807144165039,
"BirCodeGenLoop": 2.421293258666992,
"CCOpFusion": 0.41050028800964355,
"CanonicalizeConv": 9.999999974752427e-07,
"CanonicalizeDAGForPGTiling": 0.21233797073364258,
"CanonicalizeForTensorizer": 0.0003640000068116933,
"CanonicalizeIR": 0.06626629829406738,
"Canonicalizer": 0.007044999860227108,
"CoalesceCCOp": 0.19146490097045898,
"CommuteConcat": 0.03319668769836426,
"DMALocalityOpt": 0.035207271575927734,
"DMAProfiler": 0.08866691589355469,
"DMATilingProfiler": 0.07109546661376953,
"DataLocalityOpt": 1.910703182220459,
"DataStreaming": 0.15389323234558105,
"DeConcat": 0.012087583541870117,
"DeadCodeElimination": 0.035611867904663086,
"DeadStoreElimination": 0.37193870544433594,
"DelinearIndices": 0.2894127368927002,
"Delinearization": 0.1295926570892334,
"DoNothing": 0.00019550323486328125,
"DramToDramTranspose": 1.0679569244384766,
"DumpGraphAndMetadata": 0.24142217636108398,
"EliminateDivs": 0.17337489128112793,
"ExpandBatchNorm": 0.06027984619140625,
"ExpandISAMacro": 0.0909569263458252,
"FactorizeBlkDims": 0.24945974349975586,
"FactorizeThreadAxesInFreeDims": 0.03613853454589844,
"FlattenMacroLoop": 0.26774168014526367,
"GenericAccessSimplifier": 0.03175926208496094,
"HoistCompute": 4.8000001697801054e-05,
"IdentifyCrossPassTensors": 0.00013600000238511711,
"InferInitValue": 1.029360294342041,
"InferIntrinsicOnCC": 0.34307408332824707,
"InferNeuronTensor": 1.7935998439788818,
"InferNonlocalTensors": 3.6307339668273926,
"InferPSumTensor": 0.9782986640930176,
"InlineNativeKernels": 0.05374264717102051,
"InsertIOTransposes": 1.162278652191162,
"InsertLocalTransposes": 1.0349645614624023,
"InsertOffloadedTransposes": 0.0943443775177002,
"LICM": 0.1061861515045166,
"LateLegalizeInst": 0.22754216194152832,
"LateLegalizePostSplit": 0.09247255325317383,
"LateLowerReshapeOp": 0.04053616523742676,
"LateLowerTensorOp": 0.3356895446777344,
"LateNeuronInstComb": 0.4516925811767578,
"LayoutPreprocessing": 0.9441671371459961,
"LayoutPreprocessingAndAnalysis": 1.2680203914642334,
"LayoutRequirementAnalysis": 0.309098482131958,
"LegalizeCCOpLayout": 0.07318258285522461,
"LegalizeOpLevelAlias": 0.03343796730041504,
"LegalizePartitionReduce": 0.034781694412231445,
"LegalizeSundaAccess": 1.4558701515197754,
"LegalizeSundaMacro": 0.37755250930786133,
"LegalizeType": 0.20858454704284668,
"LocalLayoutOpt": 0.36218762397766113,
"LoopFusion": 0.31240200996398926,
"LoopSplitting": 0.013066768646240234,
"LowerBroadcast": 0.047890663146972656,
"LowerCCOpBlockAxis": 0.23094987869262695,
"LowerComplexBroadcast": 0.15572404861450195,
"LowerIntrinsics": 1.228858470916748,
"LowerTensorOp": 0.4897449016571045,
"LowerTranspose": 0.3995330333709717,
"MacroGeneration": 2.335334062576294,
"MaskPropagation": 0.14433836936950684,
"MemcastMotion": 0.00013000000035390258,
"MemcpyElimination": 3.9867260456085205,
"MutateDataType": 0.04344511032104492,
"NeuronAliasDependencyInduction": 0.025929927825927734,
"NeuronAliasDependencyReset": 0.04254412651062012,
"NeuronInstComb": 0.19350981712341309,
"NeuronLICM": 0.2897522449493408,
"NeuronLoopFusion": 0.4089043140411377,
"NeuronLoopInterchange": 0.04476189613342285,
"NeuronSimplifier": 0.30055856704711914,
"NeuronSimplifyPredicates": 0.18221426010131836,
"NeuronValueNumbering": 0.10663247108459473,
"OptimizeAliasedCopyChain": 0.01511383056640625,
"OptimizeNKIKernels": 0.4606451988220215,
"PAGLayoutOpt": 26.32272720336914,
"PComputeCutting": 0.302201509475708,
"PGLayoutTilingPipeline": 38.88710403442383,
"PGTiling": 4.423768043518066,
"PadElimination": 0.008622884750366211,
"ParAxesAnnotation": 25.272018432617188,
"PartialLoopFusion": 0.2368309497833252,
"PartialSimdFusion": 0.20722246170043945,
"PenguinizeFunctions": 0.00015999999595806003,
"PerfectLoopNest": 0.06273055076599121,
"PruneFunctions": 0.00016700000560376793,
"RecognizeOpIdiom": 0.20455479621887207,
"Recompute": 0.00649714469909668,
"RelaxPredicates": 0.154876708984375,
"Rematerialization": 0.16764259338378906,
"RemoveOptimizationBarriers": 0.00014099999680183828,
"ReshapeWeights": 0.021569013595581055,
"ResolveAccessConflict": 0.24012255668640137,
"ResolveComplicatePredicates": 0.05034017562866211,
"RewriteReplicationMatmul": 0.04589343070983887,
"RewriteWeights": 0.05840659141540527,
"SFKVectorizer": 3.1227571964263916,
"ScatterMotion": 0.0041600000113248825,
"SimpleAllReduceTiling": 0.06594347953796387,
"Simplifier": 0.11366057395935059,
"SimplifyMacroPredicates": 0.18840670585632324,
"SimplifyNeuronTensor": 1.3299446105957031,
"SimplifySlice": 0.03386688232421875,
"SimplifyTensor": 0.21405529975891113,
"SpillPSum": 0.5441117286682129,
"SplitAPUnionSets": 0.3313255310058594,
"SplitAccGrp": 0.03839588165283203,
"StaticProfiler": 0.13296246528625488,
"StaticTransposeLocalTensor": 0.21724367141723633,
"SundaISel": 1.6302134990692139,
"TCTransform": 0.03438615798950195,
"TensorInitialization": 0.13414645195007324,
"TensorOpSimplifier": 0.27712535858154297,
"TensorOpTransform": 0.8646912574768066,
"TensorizerLegalizationPass": 0.000155999994603917,
"TileCCOps": 0.263721227645874,
"TilingProfiler": 0.39296984672546387,
"TransformConvOp": 0.06336498260498047,
"TritiumFusion": 1.0901517868041992,
"ValueNumbering": 0.09328150749206543,
"VectorizeDMA": 0.03394460678100586,
"VectorizeMatMult": 0.0209348201751709,
"VerifySupportedOps": 0.00023200000578071922,
"WeightCoalescing": 0.05484199523925781,
"ZeroSizeTensorElimination": 0.0004336833953857422,
"algsimp": 0.0020280000753700733,
"batchnorm_expander": 0.0007249999907799065,
"boundary-marker-removal": 0.0004140000091865659,
"call-inliner": 0.0002570000069681555,
"canonicalize-boundary-marker": 0.00044800000614486635,
"collective-stream-id-checker": 7.000000186963007e-05,
"comparison-expander": 0.00041700000292621553,
"computation-deduplicator": 0.0004440000047907233,
"conditional-to-select": 8.70000003487803e-05,
"config-lowering": 0.00020700000459328294,
"constant_folding": 0.00016900000628083944,
"cse": 0.00043799998820759356,
"dce": 3.899999865097925e-05,
"dynamic-slice-transpose": 0.00015799999528098851,
"eliminate-redundant-compare": 0.0001539999939268455,
"emit-offloaded-dropout": 0.0002770000137388706,
"flatten-call-graph": 0.000299000006634742,
"fuse-send-recv": 0.0015030000358819962,
"hilo::LegalizeAlias": 0.003281000070273876,
"hilo::NeuronInstCombine": 0.0011020000092685223,
"hilo::NeuronOpFusion": 0.0003429999924264848,
"hilo::ReplaceTokenTypeWithU8Pass": 0.00018600000475998968,
"hilo::ScheduleFusion": 3.5000000934815034e-05,
"hilo::SixtyFourHack": 0.00020599999697878957,
"hilo::VerifyAliasing": 7.000000186963007e-05,
"hlo-mac-count": 0.0006559999892488122,
"hlo-verifier": 0.006031000055372715,
"io-con-pipe-begin": 4.999999873689376e-06,
"io-con-pipe-end": 9.999999974752427e-07,
"io-layout-normalization": 0.0009500000160187483,
"legalize-ccops": 1.700000029813964e-05,
"legalize-compare": 0.00036899998667649925,
"lower-argminmax-custom-call": 0.00013800000306218863,
"map-inline": 0.0006319999811239541,
"metadata-naming": 0.0009749999735504389,
"mlir::detail::OpToOpPassAdaptor": 0.00022499999613501132,
"mlir::hlo::MhloToPyPenguin": 0.025104999542236328,
"mlir::mhlo::LowerComplexExtraPass": 0.002770999912172556,
"mlir::mhlo::LowerComplexPass": 0.001180000021122396,
"native-to-custom-softmax": 0.00041199999395757914,
"native-to-custom-softmax-dx": 0.00042600001324899495,
"operand_upcaster": 0.0007089999853633344,
"post-par-pipe-begin": 9.999999974752427e-07,
"post-par-pipe-end": 0.0,
"post-partition-simplification": 0.05639899894595146,
"pre-hlo-begin": 4.999999873689376e-06,
"pre-hlo-end": 9.999999974752427e-07,
"replace-minimum-constant": 0.0002209999947808683,
"reshape-mover": 7.400000322377309e-05,
"simplify-concat": 0.0018210000125691295,
"simplify-while-loops": 5.500000042957254e-05,
"transform-variadic-reduce": 0.0006440000142902136,
"tuple-simplifier": 0.00016700000560376793,
"unpack-nested-aws-ntwsr": 0.00035700001171790063,
"unroll-while-loop": 1.1000000085914508e-05
},
"hilo": {
"HloMacCount": 3802996736.0,
"Traffic": 8267154432.0
},
"tensorizer": {
"DMATilingProfiler::TotalInstructionsAfterTiling": 262321,
"StaticProfiler::AifUb": 10.559271812438965,
"StaticProfiler::ArithmeticIntensityTensorizer": 11.613152503967285,
"StaticProfiler::AverageDmaLength": 6652.8759765625,
"StaticProfiler::DDRTransferBytes": 7587185496,
"StaticProfiler::InternalTransferBytes": 632323092,
"StaticProfiler::LoadExpanded": 1033407,
"StaticProfiler::StoreExpanded": 3422,
"StaticProfiler::TotalDMAExpanded": 1036829,
"StaticProfiler::TotalDynamicInstancesCount": 275548,
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 274994,
"StaticProfiler::TotalLNCComm": 0,
"StaticProfiler::TotalLNCCommTransfer": 0,
"TilingProfiler::BatchnormInstructionsAfterTiling": 0,
"TilingProfiler::DmaInstructionsAfterTiling": 0,
"TilingProfiler::GenericInstructionsAfterTiling": 79,
"TilingProfiler::MatMultInstructionsAfterTiling": 231408,
"TilingProfiler::NumPfTransposes": 398,
"TilingProfiler::NumPfTransposesForIo": 37,
"TilingProfiler::NumPfTransposesForLocal": 216,
"TilingProfiler::NumPfTransposesForNonlocal": 145,
"TilingProfiler::PfTransposeInstructions": 19513,
"TilingProfiler::PfTransposeInstructionsForIo": 19152,
"TilingProfiler::PfTransposeInstructionsForLocal": 216,
"TilingProfiler::PfTransposeInstructionsForNonlocal": 145,
"TilingProfiler::ReduceInstructionsAfterTiling": 74,
"TilingProfiler::SimdInstructionsAfterTiling": 2999,
"TilingProfiler::TotalInstructionsAfterTiling": 0,
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0,
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0,
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0,
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0,
"TransformConvOp::conv2d_column_packing": 0,
"TransformConvOp::conv2d_column_packing_1": 0,
"TransformConvOp::conv2d_column_packing_io10": 0,
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0
}
},
"all": {
"compiletime": {
"CanonicalizeConv": 9.999999974752427e-07,
"CanonicalizeForTensorizer": 0.0003640000068116933,
"Canonicalizer": 0.007044999860227108,
"HoistCompute": 4.8000001697801054e-05,
"IdentifyCrossPassTensors": 0.00013600000238511711,
"MemcastMotion": 0.00013000000035390258,
"PenguinizeFunctions": 0.00015999999595806003,
"PruneFunctions": 0.00016700000560376793,
"RemoveOptimizationBarriers": 0.00014099999680183828,
"ScatterMotion": 0.0041600000113248825,
"TensorizerLegalizationPass": 0.000155999994603917,
"VerifySupportedOps": 0.00023200000578071922,
"algsimp": 0.0020280000753700733,
"batchnorm_expander": 0.0007249999907799065,
"boundary-marker-removal": 0.0004140000091865659,
"call-inliner": 0.0002570000069681555,
"canonicalize-boundary-marker": 0.00044800000614486635,
"collective-stream-id-checker": 7.000000186963007e-05,
"comparison-expander": 0.00041700000292621553,
"computation-deduplicator": 0.0004440000047907233,
"conditional-to-select": 8.70000003487803e-05,
"config-lowering": 0.00020700000459328294,
"constant_folding": 0.00016900000628083944,
"cse": 0.00043799998820759356,
"dce": 3.899999865097925e-05,
"dynamic-slice-transpose": 0.00015799999528098851,
"eliminate-redundant-compare": 0.0001539999939268455,
"emit-offloaded-dropout": 0.0002770000137388706,
"flatten-call-graph": 0.000299000006634742,
"fuse-send-recv": 0.0015030000358819962,
"hilo::LegalizeAlias": 0.003281000070273876,
"hilo::NeuronInstCombine": 0.0011020000092685223,
"hilo::NeuronOpFusion": 0.0003429999924264848,
"hilo::ReplaceTokenTypeWithU8Pass": 0.00018600000475998968,
"hilo::ScheduleFusion": 3.5000000934815034e-05,
"hilo::SixtyFourHack": 0.00020599999697878957,
"hilo::VerifyAliasing": 7.000000186963007e-05,
"hlo-mac-count": 0.0006559999892488122,
"hlo-verifier": 0.006031000055372715,
"io-con-pipe-begin": 4.999999873689376e-06,
"io-con-pipe-end": 9.999999974752427e-07,
"io-layout-normalization": 0.0009500000160187483,
"legalize-ccops": 1.700000029813964e-05,
"legalize-compare": 0.00036899998667649925,
"lower-argminmax-custom-call": 0.00013800000306218863,
"map-inline": 0.0006319999811239541,
"metadata-naming": 0.0009749999735504389,
"mlir::detail::OpToOpPassAdaptor": 0.00022499999613501132,
"mlir::hlo::MhloToPyPenguin": 0.025104999542236328,
"mlir::mhlo::LowerComplexExtraPass": 0.002770999912172556,
"mlir::mhlo::LowerComplexPass": 0.001180000021122396,
"native-to-custom-softmax": 0.00041199999395757914,
"native-to-custom-softmax-dx": 0.00042600001324899495,
"operand_upcaster": 0.0007089999853633344,
"post-par-pipe-begin": 9.999999974752427e-07,
"post-par-pipe-end": 0.0,
"post-partition-simplification": 0.05639899894595146,
"pre-hlo-begin": 4.999999873689376e-06,
"pre-hlo-end": 9.999999974752427e-07,
"replace-minimum-constant": 0.0002209999947808683,
"reshape-mover": 7.400000322377309e-05,
"simplify-concat": 0.0018210000125691295,
"simplify-while-loops": 5.500000042957254e-05,
"transform-variadic-reduce": 0.0006440000142902136,
"tuple-simplifier": 0.00016700000560376793,
"unpack-nested-aws-ntwsr": 0.00035700001171790063,
"unroll-while-loop": 1.1000000085914508e-05
}
},
"cumsum": {
"compiletime": {
"CoalesceCCOp": 0.0008378028869628906,
"DMALocalityOpt": 0.0003306865692138672,
"DMAProfiler": 0.0007596015930175781,
"DataStreaming": 0.0002918243408203125,
"DoNothing": 0.00012636184692382813,
"ExpandISAMacro": 0.0005497932434082031,
"FactorizeBlkDims": 0.0004723072052001953,
"InferPSumTensor": 0.000583648681640625,
"LateLegalizeInst": 0.00040459632873535156,
"LateNeuronInstComb": 0.0004837512969970703,
"LegalizeSundaAccess": 0.0015611648559570313,
"LegalizeType": 0.00025010108947753906,
"LowerBroadcast": 0.0009808540344238281,
"LowerIntrinsics": 0.0002262592315673828,
"LowerTranspose": 0.00021767616271972656,
"NeuronInstComb": 0.0004963874816894531,
"NeuronLICM": 0.0006859302520751953,
"NeuronSimplifyPredicates": 0.002815723419189453,
"NeuronValueNumbering": 0.0004124641418457031,
"SFKVectorizer": 0.0027742385864257813,
"SimpleAllReduceTiling": 0.000209808349609375,
"SimplifyNeuronTensor": 0.00040721893310546875,
"SpillPSum": 0.0009286403656005859,
"WeightCoalescing": 0.0002105236053466797
}
},
"sg00": {
"hilo": {
"ArithmeticIntensity": 0.9200255870819092,
"HloMacCount": 3802996736.0,
"Traffic": 8267154432.0
}
},
"sg0000": {
"compiletime": {
"AGOrderingAnalysisPass": 1.4457588195800781,
"AffinePredicateResolution": 0.05167531967163086,
"AliasDependencyElimination": 0.0026276111602783203,
"AliasDependencyInduction": 0.44934630393981934,
"AliasDependencyReset": 1.2677826881408691,
"BFComputeCutting": 0.06423807144165039,
"BirCodeGenLoop": 2.421293258666992,
"CCOpFusion": 0.41050028800964355,
"CanonicalizeDAGForPGTiling": 0.21233797073364258,
"CanonicalizeIR": 0.06626629829406738,
"CoalesceCCOp": 0.1906270980834961,
"CommuteConcat": 0.03319668769836426,
"DMALocalityOpt": 0.03487658500671387,
"DMAProfiler": 0.08790731430053711,
"DMATilingProfiler": 0.07109546661376953,
"DataLocalityOpt": 1.910703182220459,
"DataStreaming": 0.15360140800476074,
"DeConcat": 0.012087583541870117,
"DeadCodeElimination": 0.035611867904663086,
"DeadStoreElimination": 0.37193870544433594,
"DelinearIndices": 0.2894127368927002,
"Delinearization": 0.1295926570892334,
"DoNothing": 6.914138793945313e-05,
"DramToDramTranspose": 1.0679569244384766,
"DumpGraphAndMetadata": 0.24142217636108398,
"EliminateDivs": 0.17337489128112793,
"ExpandBatchNorm": 0.06027984619140625,
"ExpandISAMacro": 0.09040713310241699,
"FactorizeBlkDims": 0.24898743629455566,
"FactorizeThreadAxesInFreeDims": 0.03613853454589844,
"FlattenMacroLoop": 0.26774168014526367,
"GenericAccessSimplifier": 0.03175926208496094,
"InferInitValue": 1.029360294342041,
"InferIntrinsicOnCC": 0.34307408332824707,
"InferNeuronTensor": 1.7935998439788818,
"InferNonlocalTensors": 3.6307339668273926,
"InferPSumTensor": 0.977715015411377,
"InlineNativeKernels": 0.05374264717102051,
"InsertIOTransposes": 1.162278652191162,
"InsertLocalTransposes": 1.0349645614624023,
"InsertOffloadedTransposes": 0.0943443775177002,
"LICM": 0.1061861515045166,
"LateLegalizeInst": 0.22713756561279297,
"LateLegalizePostSplit": 0.09247255325317383,
"LateLowerReshapeOp": 0.04053616523742676,
"LateLowerTensorOp": 0.3356895446777344,
"LateNeuronInstComb": 0.45120882987976074,
"LayoutPreprocessing": 0.9441671371459961,
"LayoutPreprocessingAndAnalysis": 1.2680203914642334,
"LayoutRequirementAnalysis": 0.309098482131958,
"LegalizeCCOpLayout": 0.07318258285522461,
"LegalizeOpLevelAlias": 0.03343796730041504,
"LegalizePartitionReduce": 0.034781694412231445,
"LegalizeSundaAccess": 1.4543089866638184,
"LegalizeSundaMacro": 0.37755250930786133,
"LegalizeType": 0.20833444595336914,
"LocalLayoutOpt": 0.36218762397766113,
"LoopFusion": 0.31240200996398926,
"LoopSplitting": 0.013066768646240234,
"LowerBroadcast": 0.04690980911254883,
"LowerCCOpBlockAxis": 0.23094987869262695,
"LowerComplexBroadcast": 0.15572404861450195,
"LowerIntrinsics": 1.2286322116851807,
"LowerTensorOp": 0.4897449016571045,
"LowerTranspose": 0.39931535720825195,
"MacroGeneration": 2.335334062576294,
"MaskPropagation": 0.14433836936950684,
"MemcpyElimination": 3.9867260456085205,
"MutateDataType": 0.04344511032104492,
"NeuronAliasDependencyInduction": 0.025929927825927734,
"NeuronAliasDependencyReset": 0.04254412651062012,
"NeuronInstComb": 0.19301342964172363,
"NeuronLICM": 0.2890663146972656,
"NeuronLoopFusion": 0.4089043140411377,
"NeuronLoopInterchange": 0.04476189613342285,
"NeuronSimplifier": 0.30055856704711914,
"NeuronSimplifyPredicates": 0.1793985366821289,
"NeuronValueNumbering": 0.10622000694274902,
"OptimizeAliasedCopyChain": 0.01511383056640625,
"OptimizeNKIKernels": 0.4606451988220215,
"PAGLayoutOpt": 26.32272720336914,
"PComputeCutting": 0.302201509475708,
"PGLayoutTilingPipeline": 38.88710403442383,
"PGTiling": 4.423768043518066,
"PadElimination": 0.008622884750366211,
"ParAxesAnnotation": 25.272018432617188,
"PartialLoopFusion": 0.2368309497833252,
"PartialSimdFusion": 0.20722246170043945,
"PerfectLoopNest": 0.06273055076599121,
"RecognizeOpIdiom": 0.20455479621887207,
"Recompute": 0.00649714469909668,
"RelaxPredicates": 0.154876708984375,
"Rematerialization": 0.16764259338378906,
"ReshapeWeights": 0.021569013595581055,
"ResolveAccessConflict": 0.24012255668640137,
"ResolveComplicatePredicates": 0.05034017562866211,
"RewriteReplicationMatmul": 0.04589343070983887,
"RewriteWeights": 0.05840659141540527,
"SFKVectorizer": 3.119982957839966,
"SimpleAllReduceTiling": 0.06573367118835449,
"Simplifier": 0.11366057395935059,
"SimplifyMacroPredicates": 0.18840670585632324,
"SimplifyNeuronTensor": 1.3295373916625977,
"SimplifySlice": 0.03386688232421875,
"SimplifyTensor": 0.21405529975891113,
"SpillPSum": 0.5431830883026123,
"SplitAPUnionSets": 0.3313255310058594,
"SplitAccGrp": 0.03839588165283203,
"StaticProfiler": 0.13296246528625488,
"StaticTransposeLocalTensor": 0.21724367141723633,
"SundaISel": 1.6302134990692139,
"TCTransform": 0.03438615798950195,
"TensorInitialization": 0.13414645195007324,
"TensorOpSimplifier": 0.27712535858154297,
"TensorOpTransform": 0.8646912574768066,
"TileCCOps": 0.263721227645874,
"TilingProfiler": 0.39296984672546387,
"TransformConvOp": 0.06336498260498047,
"TritiumFusion": 1.0901517868041992,
"ValueNumbering": 0.09328150749206543,
"VectorizeDMA": 0.03394460678100586,
"VectorizeMatMult": 0.0209348201751709,
"WeightCoalescing": 0.05463147163391113,
"ZeroSizeTensorElimination": 0.0004336833953857422
},
"tensorizer": {
"DMATilingProfiler::TotalInstructionsAfterTiling": 262321,
"StaticProfiler::AifUb": 10.559271812438965,
"StaticProfiler::ArithmeticIntensityTensorizer": 11.613152503967285,
"StaticProfiler::AverageDmaLength": 6652.8759765625,
"StaticProfiler::AverageFractalPeUtilization": 99.8321762084961,
"StaticProfiler::AveragePartitionUtilization": 99.3888168334961,
"StaticProfiler::AveragePeUtilization": 99.65400695800781,
"StaticProfiler::DDRTransferBytes": 7587185496,
"StaticProfiler::InternalTransferBytes": 632323092,
"StaticProfiler::LoadExpanded": 1033407,
"StaticProfiler::LocalizationEfficiency": 109.9806137084961,
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 110.06793212890625,
"StaticProfiler::StoreExpanded": 3422,
"StaticProfiler::TotalDMAExpanded": 1036829,
"StaticProfiler::TotalDynamicInstancesCount": 275548,
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 274994,
"StaticProfiler::TotalLNCComm": 0,
"StaticProfiler::TotalLNCCommTransfer": 0,
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0,
"TilingProfiler::AveragePeUtilizationAfterTiling": 0,
"TilingProfiler::BatchnormInstructionsAfterTiling": 0,
"TilingProfiler::DmaInstructionsAfterTiling": 0,
"TilingProfiler::GenericInstructionsAfterTiling": 79,
"TilingProfiler::MatMultInstructionsAfterTiling": 231408,
"TilingProfiler::NumPfTransposes": 398,
"TilingProfiler::NumPfTransposesForIo": 37,
"TilingProfiler::NumPfTransposesForLocal": 216,
"TilingProfiler::NumPfTransposesForNonlocal": 145,
"TilingProfiler::PfTransposeInstructions": 19513,
"TilingProfiler::PfTransposeInstructionsForIo": 19152,
"TilingProfiler::PfTransposeInstructionsForLocal": 216,
"TilingProfiler::PfTransposeInstructionsForNonlocal": 145,
"TilingProfiler::ReduceInstructionsAfterTiling": 74,
"TilingProfiler::SimdInstructionsAfterTiling": 2999,
"TilingProfiler::TotalInstructionsAfterTiling": 0,
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0,
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0,
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0,
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0,
"TransformConvOp::conv2d_column_packing": 0,
"TransformConvOp::conv2d_column_packing_1": 0,
"TransformConvOp::conv2d_column_packing_io10": 0,
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0
}
}
}