Qwen3-8BSharded / context_encoding_model /_tp0_bk2 /global_metric_store.json
jburtoft's picture
Upload folder using huggingface_hub
ee61cf7 verified
{
"Average": {
"tensorizer": {
"StaticProfiler::AverageFractalPeUtilization": 99.66542053222656,
"StaticProfiler::AveragePartitionUtilization": 97.7269515991211,
"StaticProfiler::AveragePeUtilization": 98.64861297607422,
"StaticProfiler::LocalizationEfficiency": 98.26979064941406,
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 101.01405334472656,
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0
}
},
"Count": {
"tensorizer": {
"StaticProfiler::AverageFractalPeUtilization": 1.0,
"StaticProfiler::AveragePartitionUtilization": 1.0,
"StaticProfiler::AveragePeUtilization": 1.0,
"StaticProfiler::LocalizationEfficiency": 1.0,
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 1.0,
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 1.0,
"TilingProfiler::AveragePeUtilizationAfterTiling": 1.0
}
},
"Sum": {
"compiletime": {
"AGOrderingAnalysisPass": 0.018257856369018555,
"AffinePredicateResolution": 0.0011677742004394531,
"AliasDependencyElimination": 0.0001201629638671875,
"AliasDependencyInduction": 0.0052988529205322266,
"AliasDependencyReset": 0.029210567474365234,
"BFComputeCutting": 0.0032625198364257813,
"BirCodeGenLoop": 0.4527714252471924,
"CCOpFusion": 0.02410125732421875,
"CanonicalizeConv": 0.00029399999766610563,
"CanonicalizeDAGForPGTiling": 0.004324913024902344,
"CanonicalizeForTensorizer": 4.8000001697801054e-05,
"CanonicalizeIR": 0.0019502639770507813,
"Canonicalizer": 0.0010809999657794833,
"CoalesceCCOp": 0.014672040939331055,
"CommuteConcat": 0.0008339881896972656,
"DMALocalityOpt": 0.005767107009887695,
"DMAProfiler": 0.012850046157836914,
"DMATilingProfiler": 0.004332065582275391,
"DataLocalityOpt": 0.07260942459106445,
"DataStreaming": 0.03969836235046387,
"DeConcat": 0.0005326271057128906,
"DeadCodeElimination": 0.0009255409240722656,
"DeadStoreElimination": 0.0055675506591796875,
"DelinearIndices": 0.004735231399536133,
"Delinearization": 0.0030374526977539063,
"DoNothing": 0.00018930435180664063,
"DramToDramTranspose": 0.018135547637939453,
"DumpGraphAndMetadata": 0.09476375579833984,
"EliminateDivs": 0.002595663070678711,
"ExpandBatchNorm": 0.002063274383544922,
"ExpandISAMacro": 0.011973381042480469,
"FactorizeBlkDims": 0.009292840957641602,
"FactorizeThreadAxesInFreeDims": 0.0010046958923339844,
"FlattenMacroLoop": 0.002232074737548828,
"GenericAccessSimplifier": 0.0018167495727539063,
"HoistCompute": 7.999999979801942e-06,
"IdentifyCrossPassTensors": 7.79999973019585e-05,
"InferInitValue": 0.024865150451660156,
"InferIntrinsicOnCC": 0.009101152420043945,
"InferNeuronTensor": 0.023293495178222656,
"InferNonlocalTensors": 0.01632833480834961,
"InferPSumTensor": 0.27726316452026367,
"InlineNativeKernels": 0.0081634521484375,
"InsertIOTransposes": 0.019203901290893555,
"InsertLocalTransposes": 0.0042340755462646484,
"InsertOffloadedTransposes": 0.002811431884765625,
"LICM": 0.0029730796813964844,
"LateLegalizeInst": 0.014307022094726563,
"LateLegalizePostSplit": 0.012536048889160156,
"LateLowerReshapeOp": 0.0018641948699951172,
"LateLowerTensorOp": 0.0014081001281738281,
"LateNeuronInstComb": 0.00915217399597168,
"LayoutPreprocessing": 0.02658390998840332,
"LayoutPreprocessingAndAnalysis": 0.10707235336303711,
"LayoutRequirementAnalysis": 0.005135536193847656,
"LegalizeCCOpLayout": 0.002307415008544922,
"LegalizeOpLevelAlias": 0.0012297630310058594,
"LegalizePartitionReduce": 0.0010194778442382813,
"LegalizeSundaAccess": 0.07808256149291992,
"LegalizeSundaMacro": 0.010968446731567383,
"LegalizeType": 0.012074947357177734,
"LocalLayoutOpt": 0.013799905776977539,
"LoopFusion": 0.0052182674407958984,
"LoopSplitting": 0.0003161430358886719,
"LowerBroadcast": 0.0015821456909179688,
"LowerCCOpBlockAxis": 0.0040547847747802734,
"LowerComplexBroadcast": 0.002165079116821289,
"LowerIntrinsics": 0.31156492233276367,
"LowerTensorOp": 0.010558843612670898,
"LowerTranspose": 0.012494325637817383,
"MacroGeneration": 0.029862642288208008,
"MaskPropagation": 0.002757549285888672,
"MemcastMotion": 3.400000059627928e-05,
"MemcpyElimination": 0.025969266891479492,
"MutateDataType": 0.002087831497192383,
"NeuronAliasDependencyInduction": 0.00016880035400390625,
"NeuronAliasDependencyReset": 0.020352602005004883,
"NeuronInstComb": 0.004656076431274414,
"NeuronLICM": 0.03560137748718262,
"NeuronLoopFusion": 0.007991313934326172,
"NeuronLoopInterchange": 0.002409219741821289,
"NeuronSimplifier": 0.007069587707519531,
"NeuronSimplifyPredicates": 0.12419009208679199,
"NeuronValueNumbering": 0.0032753944396972656,
"OptimizeAliasedCopyChain": 0.0005936622619628906,
"OptimizeNKIKernels": 0.5374257564544678,
"PAGLayoutOpt": 0.08115577697753906,
"PComputeCutting": 0.004801273345947266,
"PGLayoutTilingPipeline": 0.5454635620117188,
"PGTiling": 0.14933419227600098,
"PadElimination": 0.00034046173095703125,
"ParAxesAnnotation": 0.053552865982055664,
"PartialLoopFusion": 0.0067539215087890625,
"PartialSimdFusion": 0.00693058967590332,
"PenguinizeFunctions": 4.5000000682193786e-05,
"PerfectLoopNest": 0.0035321712493896484,
"PruneFunctions": 5.199999941396527e-05,
"RecognizeOpIdiom": 0.003947257995605469,
"Recompute": 0.00024962425231933594,
"RelaxPredicates": 0.013285398483276367,
"Rematerialization": 0.002062082290649414,
"RemoveOptimizationBarriers": 8.70000003487803e-05,
"ReshapeWeights": 0.002131223678588867,
"ResolveAccessConflict": 0.0038597583770751953,
"ResolveComplicatePredicates": 0.002032756805419922,
"RewriteReplicationMatmul": 0.001924753189086914,
"RewriteWeights": 0.002452373504638672,
"SFKVectorizer": 0.2718319892883301,
"ScatterMotion": 3.7999998312443495e-05,
"SimpleAllReduceTiling": 0.008960247039794922,
"Simplifier": 0.004038810729980469,
"SimplifyMacroPredicates": 0.010622739791870117,
"SimplifyNeuronTensor": 1.0594146251678467,
"SimplifySlice": 0.0009577274322509766,
"SimplifyTensor": 0.005341768264770508,
"SpillPSum": 0.012076139450073242,
"SplitAPUnionSets": 0.10771751403808594,
"SplitAccGrp": 0.002201557159423828,
"StaticProfiler": 0.012447118759155273,
"StaticTransposeLocalTensor": 0.0038712024688720703,
"SundaISel": 0.04214668273925781,
"TCTransform": 0.0008432865142822266,
"TensorInitialization": 0.012825727462768555,
"TensorOpSimplifier": 0.004651308059692383,
"TensorOpTransform": 0.019537687301635742,
"TensorizerLegalizationPass": 5.7999997807201e-05,
"TileCCOps": 0.006766319274902344,
"TilingProfiler": 0.006911277770996094,
"TransformConvOp": 0.0030303001403808594,
"TritiumFusion": 0.04502224922180176,
"ValueNumbering": 0.001996755599975586,
"VectorizeDMA": 0.0019402503967285156,
"VectorizeMatMult": 0.0027413368225097656,
"VerifySupportedOps": 3.7000001611886546e-05,
"WeightCoalescing": 0.008520841598510742,
"ZeroSizeTensorElimination": 0.00013709068298339844,
"algsimp": 0.0026940000243484974,
"batchnorm_expander": 4.400000034365803e-05,
"boundary-marker-removal": 1.5999999959603883e-05,
"call-inliner": 0.00046999999904073775,
"canonicalize-boundary-marker": 1.8999999156221747e-05,
"collective-stream-id-checker": 7.300000288523734e-05,
"comparison-expander": 0.0005740000051446259,
"computation-deduplicator": 7.999999797903001e-05,
"conditional-to-select": 1.8000000636675395e-05,
"config-lowering": 0.0003279999946244061,
"constant-statistics": 0.0005329999839887023,
"constant_folding": 0.0003260000084992498,
"cse": 4.5000000682193786e-05,
"dce": 8.399999933317304e-05,
"dot_decomposer": 0.0013409999664872885,
"dynamic-slice-transpose": 1.3999999282532372e-05,
"eliminate-redundant-compare": 0.0002959999837912619,
"emit-offloaded-dropout": 6.399999983841553e-05,
"flatten-call-graph": 0.0009319999953731894,
"fuse-send-recv": 6.999999459367245e-05,
"hilo::LegalizeAlias": 1.3999999282532372e-05,
"hilo::NeuronInstCombine": 0.0001660000125411898,
"hilo::NeuronOpFusion": 2.5000001187436283e-05,
"hilo::ReplaceTokenTypeWithU8Pass": 5.2999999752501026e-05,
"hilo::ScheduleFusion": 7.000000096013537e-06,
"hilo::SixtyFourHack": 7.299999560927972e-05,
"hilo::VerifyAliasing": 6.000000212225132e-06,
"hlo-mac-count": 0.0013429999817162752,
"hlo-verifier": 0.007542999926954508,
"instruction-histogram": 0.0006709999870508909,
"io-con-pipe-begin": 4.999999873689376e-06,
"io-con-pipe-end": 9.999999974752427e-07,
"io-layout-normalization": 0.001310999970883131,
"io-statistics": 8.499999967170879e-05,
"legalize-ccops": 3.999999989900971e-06,
"legalize-compare": 1.2999999853491317e-05,
"lower-argminmax-custom-call": 1.300000076298602e-05,
"map-inline": 0.0008850000449456275,
"metadata-naming": 5.999999848427251e-05,
"mlir::detail::OpToOpPassAdaptor": 0.00014399999054148793,
"mlir::hlo::MhloToPyPenguin": 0.004429999738931656,
"mlir::mhlo::LowerComplexExtraPass": 0.00027299998328089714,
"mlir::mhlo::LowerComplexPass": 0.0004909999552182853,
"native-to-custom-softmax": 0.0007070000283420086,
"native-to-custom-softmax-dx": 0.0005990000208839774,
"operand_upcaster": 4.900000203633681e-05,
"opt-barrier-removal": 0.0005510000046342611,
"post-par-pipe-begin": 8.999999408842996e-06,
"post-par-pipe-end": 0.0,
"post-partition-simplification": 0.0018570000538602471,
"pre-par-pipe-begin": 9.999999974752427e-07,
"pre-par-pipe-end": 0.0,
"pre-partition-simplification": 0.12893199920654297,
"replace-minimum-constant": 0.0004569999873638153,
"reshape-mover": 0.00012599999899975955,
"simplify-concat": 0.00015899998834356666,
"simplify-while-loops": 0.00010400000610388815,
"transform-variadic-reduce": 7.000000186963007e-05,
"tuple-simplifier": 0.0003150000120513141,
"unpack-nested-aws-ntwsr": 0.0004349999944679439,
"unroll-while-loop": 2.099999983329326e-05,
"zero_sized_hlo_elimination": 0.0008670000243000686
},
"hilo": {
"ConstantSize": 1189157.0,
"HloInputCount": 475.0,
"HloMacCount": 101242896384.0,
"HloOutputCount": 73.0,
"IfmapSize": 8266545152.0,
"OfmapSize": 75497472.0,
"OutputsReadFromCount": 0.0,
"PassthroughTensorsCount": 0.0,
"RedundantOutputCount": 0.0,
"Traffic": 1692493184.0
},
"tensorizer": {
"DMATilingProfiler::TotalInstructionsAfterTiling": 44382.0,
"StaticProfiler::AifUb": 205.154296875,
"StaticProfiler::ArithmeticIntensityTensorizer": 201.6046905517578,
"StaticProfiler::AverageDmaLength": 1901.806396484375,
"StaticProfiler::DDRTransferBytes": 795531072.0,
"StaticProfiler::InternalTransferBytes": 646388224.0,
"StaticProfiler::LoadExpanded": 376342.0,
"StaticProfiler::StoreExpanded": 4189.0,
"StaticProfiler::TotalDMAExpanded": 380531.0,
"StaticProfiler::TotalDynamicInstancesCount": 53882.0,
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 53436.0,
"StaticProfiler::TotalLNCComm": 0.0,
"StaticProfiler::TotalLNCCommTransfer": 0.0,
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
"TilingProfiler::GenericInstructionsAfterTiling": 4.0,
"TilingProfiler::MatMultInstructionsAfterTiling": 23616.0,
"TilingProfiler::NumPfTransposes": 5.0,
"TilingProfiler::NumPfTransposesForIo": 1.0,
"TilingProfiler::NumPfTransposesForLocal": 1.0,
"TilingProfiler::NumPfTransposesForNonlocal": 3.0,
"TilingProfiler::PfTransposeInstructions": 19393.0,
"TilingProfiler::PfTransposeInstructionsForIo": 19008.0,
"TilingProfiler::PfTransposeInstructionsForLocal": 1.0,
"TilingProfiler::PfTransposeInstructionsForNonlocal": 384.0,
"TilingProfiler::ReduceInstructionsAfterTiling": 4.0,
"TilingProfiler::SimdInstructionsAfterTiling": 158.0,
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
"TransformConvOp::conv2d_column_packing": 0.0,
"TransformConvOp::conv2d_column_packing_1": 0.0,
"TransformConvOp::conv2d_column_packing_io10": 0.0,
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
}
},
"all": {
"compiletime": {
"algsimp": 0.002466999925673008,
"call-inliner": 0.0004360000020824373,
"collective-stream-id-checker": 6.299999949987978e-05,
"comparison-expander": 0.0005569999921135604,
"constant-statistics": 0.0005329999839887023,
"constant_folding": 0.0002969999914057553,
"dce": 7.999999797903001e-05,
"dot_decomposer": 0.0013409999664872885,
"eliminate-redundant-compare": 0.00028199999360367656,
"flatten-call-graph": 0.0008999999845400453,
"hlo-mac-count": 0.0010720000136643648,
"hlo-verifier": 0.0069679999724030495,
"instruction-histogram": 0.0006709999870508909,
"io-con-pipe-begin": 4.999999873689376e-06,
"io-con-pipe-end": 9.999999974752427e-07,
"io-layout-normalization": 0.001310999970883131,
"io-statistics": 8.499999967170879e-05,
"map-inline": 0.0008440000237897038,
"native-to-custom-softmax": 0.0006750000175088644,
"native-to-custom-softmax-dx": 0.0005000000237487257,
"opt-barrier-removal": 0.0005510000046342611,
"pre-par-pipe-begin": 9.999999974752427e-07,
"pre-par-pipe-end": 0.0,
"pre-partition-simplification": 0.12893199920654297,
"replace-minimum-constant": 0.0004309999931138009,
"reshape-mover": 0.00011500000255182385,
"simplify-while-loops": 9.600000339560211e-05,
"tuple-simplifier": 0.0002969999914057553,
"unpack-nested-aws-ntwsr": 0.00042100000428035855,
"unroll-while-loop": 1.9999999494757503e-05,
"zero_sized_hlo_elimination": 0.0008670000243000686
}
},
"cumsum": {
"compiletime": {
"CoalesceCCOp": 0.00020885467529296875,
"DMALocalityOpt": 0.00016832351684570313,
"DMAProfiler": 0.0007588863372802734,
"DataStreaming": 0.00029587745666503906,
"DoNothing": 0.00011897087097167969,
"ExpandISAMacro": 0.0005011558532714844,
"FactorizeBlkDims": 0.00043463706970214844,
"InferPSumTensor": 0.00044608116149902344,
"LateLegalizeInst": 0.0004031658172607422,
"LateNeuronInstComb": 0.0005033016204833984,
"LegalizeSundaAccess": 0.0021431446075439453,
"LegalizeType": 0.00024056434631347656,
"LowerBroadcast": 0.00022101402282714844,
"LowerIntrinsics": 0.00023508071899414063,
"LowerTranspose": 0.0002219676971435547,
"NeuronInstComb": 0.0005297660827636719,
"NeuronLICM": 0.00041484832763671875,
"NeuronSimplifyPredicates": 0.0028023719787597656,
"NeuronValueNumbering": 0.00043582916259765625,
"SFKVectorizer": 0.002759695053100586,
"SimpleAllReduceTiling": 0.00020432472229003906,
"SimplifyNeuronTensor": 0.0004029273986816406,
"SpillPSum": 0.0005388259887695313,
"WeightCoalescing": 0.0002307891845703125
}
},
"sg00": {
"compiletime": {
"CanonicalizeConv": 2.300000051036477e-05,
"CanonicalizeForTensorizer": 2.300000051036477e-05,
"Canonicalizer": 0.0005249999812804163,
"HoistCompute": 3.000000106112566e-06,
"IdentifyCrossPassTensors": 3.099999958067201e-05,
"MemcastMotion": 9.999999747378752e-06,
"PenguinizeFunctions": 2.2000000171829015e-05,
"PruneFunctions": 1.2999999853491317e-05,
"RemoveOptimizationBarriers": 4.400000034365803e-05,
"ScatterMotion": 6.000000212225132e-06,
"TensorizerLegalizationPass": 3.600000127335079e-05,
"VerifySupportedOps": 1.700000029813964e-05,
"algsimp": 0.0001049999991664663,
"batchnorm_expander": 1.8999999156221747e-05,
"boundary-marker-removal": 7.000000096013537e-06,
"call-inliner": 1.4000000192027073e-05,
"canonicalize-boundary-marker": 7.999999979801942e-06,
"collective-stream-id-checker": 3.999999989900971e-06,
"comparison-expander": 7.000000096013537e-06,
"computation-deduplicator": 2.099999983329326e-05,
"conditional-to-select": 7.000000096013537e-06,
"config-lowering": 0.00027600000612437725,
"constant_folding": 1.2000000424450263e-05,
"cse": 2.2000000171829015e-05,
"dce": 1.9999999949504854e-06,
"dynamic-slice-transpose": 6.000000212225132e-06,
"eliminate-redundant-compare": 6.000000212225132e-06,
"emit-offloaded-dropout": 3.7999998312443495e-05,
"flatten-call-graph": 1.2999999853491317e-05,
"fuse-send-recv": 3.099999958067201e-05,
"hilo::LegalizeAlias": 7.000000096013537e-06,
"hilo::NeuronInstCombine": 6.299999949987978e-05,
"hilo::NeuronOpFusion": 6.000000212225132e-06,
"hilo::ReplaceTokenTypeWithU8Pass": 2.300000051036477e-05,
"hilo::ScheduleFusion": 1.9999999949504854e-06,
"hilo::SixtyFourHack": 2.099999983329326e-05,
"hilo::VerifyAliasing": 3.000000106112566e-06,
"hlo-mac-count": 7.300000288523734e-05,
"hlo-verifier": 0.00023600000713486224,
"legalize-ccops": 1.9999999949504854e-06,
"legalize-compare": 6.000000212225132e-06,
"lower-argminmax-custom-call": 6.000000212225132e-06,
"map-inline": 1.700000029813964e-05,
"metadata-naming": 2.499999936844688e-05,
"mlir::detail::OpToOpPassAdaptor": 2.2000000171829015e-05,
"mlir::hlo::MhloToPyPenguin": 0.002633000025525689,
"mlir::mhlo::LowerComplexExtraPass": 0.0001049999991664663,
"mlir::mhlo::LowerComplexPass": 0.00017299999308306724,
"native-to-custom-softmax": 2.099999983329326e-05,
"native-to-custom-softmax-dx": 6.600000051548705e-05,
"operand_upcaster": 2.2000000171829015e-05,
"post-par-pipe-begin": 4.999999873689376e-06,
"post-par-pipe-end": 0.0,
"post-partition-simplification": 0.0008430000161752105,
"replace-minimum-constant": 1.1000000085914508e-05,
"reshape-mover": 4.999999873689376e-06,
"simplify-concat": 6.70000008540228e-05,
"simplify-while-loops": 3.999999989900971e-06,
"transform-variadic-reduce": 1.2999999853491317e-05,
"tuple-simplifier": 7.999999979801942e-06,
"unpack-nested-aws-ntwsr": 6.000000212225132e-06,
"unroll-while-loop": 9.999999974752427e-07
},
"hilo": {
"ArithmeticIntensity": 34.445003509521484,
"ConstantSize": 1189157.0,
"HloInputCount": 475.0,
"HloMacCount": 11811160064.0,
"HloOutputCount": 73.0,
"IfmapSize": 8266545152.0,
"OfmapSize": 75497472.0,
"OutputsReadFromCount": 0.0,
"PassthroughTensorsCount": 0.0,
"RedundantOutputCount": 0.0,
"Traffic": 685798208.0
}
},
"sg0000": {
"compiletime": {
"AGOrderingAnalysisPass": 0.07801461219787598,
"AffinePredicateResolution": 0.0017647743225097656,
"AliasDependencyElimination": 0.0001277923583984375,
"AliasDependencyInduction": 0.00855708122253418,
"AliasDependencyReset": 0.08457040786743164,
"BFComputeCutting": 0.003294229507446289,
"BirCodeGenLoop": 0.05274701118469238,
"CCOpFusion": 0.030017614364624023,
"CanonicalizeDAGForPGTiling": 0.003341197967529297,
"CanonicalizeIR": 0.0022792816162109375,
"CoalesceCCOp": 0.0053555965423583984,
"CommuteConcat": 0.0023560523986816406,
"DMALocalityOpt": 0.0013885498046875,
"DMAProfiler": 0.00625157356262207,
"DMATilingProfiler": 0.003763914108276367,
"DataLocalityOpt": 0.09786868095397949,
"DataStreaming": 0.004992246627807617,
"DeConcat": 0.002264261245727539,
"DeadCodeElimination": 0.002042531967163086,
"DeadStoreElimination": 0.030755043029785156,
"DelinearIndices": 0.009100914001464844,
"Delinearization": 0.004424571990966797,
"DoNothing": 6.914138793945313e-05,
"DramToDramTranspose": 0.03130936622619629,
"DumpGraphAndMetadata": 0.005283832550048828,
"EliminateDivs": 0.0042150020599365234,
"ExpandBatchNorm": 0.0019366741180419922,
"ExpandISAMacro": 0.002724170684814453,
"FactorizeBlkDims": 0.011873722076416016,
"FactorizeThreadAxesInFreeDims": 0.002283811569213867,
"FlattenMacroLoop": 0.0031974315643310547,
"GenericAccessSimplifier": 0.002216339111328125,
"InferInitValue": 0.030458927154541016,
"InferIntrinsicOnCC": 0.011402368545532227,
"InferNeuronTensor": 0.04513859748840332,
"InferNonlocalTensors": 0.10613727569580078,
"InferPSumTensor": 0.037427663803100586,
"InlineNativeKernels": 0.00368499755859375,
"InsertIOTransposes": 0.012629508972167969,
"InsertLocalTransposes": 0.007400989532470703,
"InsertOffloadedTransposes": 0.0025758743286132813,
"LICM": 0.0031554698944091797,
"LateLegalizeInst": 0.005858182907104492,
"LateLegalizePostSplit": 0.0029172897338867188,
"LateLowerReshapeOp": 0.0018696784973144531,
"LateLowerTensorOp": 0.004997968673706055,
"LateNeuronInstComb": 0.019808530807495117,
"LayoutPreprocessing": 0.04119300842285156,
"LayoutPreprocessingAndAnalysis": 0.10642147064208984,
"LayoutRequirementAnalysis": 0.0070705413818359375,
"LegalizeCCOpLayout": 0.004191398620605469,
"LegalizeOpLevelAlias": 0.0015521049499511719,
"LegalizePartitionReduce": 0.002257108688354492,
"LegalizeSundaAccess": 0.03900027275085449,
"LegalizeSundaMacro": 0.010483741760253906,
"LegalizeType": 0.0038602352142333984,
"LocalLayoutOpt": 0.01764845848083496,
"LoopFusion": 0.006066322326660156,
"LoopSplitting": 0.0015685558319091797,
"LowerBroadcast": 0.0020384788513183594,
"LowerCCOpBlockAxis": 0.005359172821044922,
"LowerComplexBroadcast": 0.0019440650939941406,
"LowerIntrinsics": 0.030491113662719727,
"LowerTensorOp": 0.012917041778564453,
"LowerTranspose": 0.010635852813720703,
"MacroGeneration": 0.06435012817382813,
"MaskPropagation": 0.0051097869873046875,
"MemcpyElimination": 0.11022067070007324,
"MutateDataType": 0.0014224052429199219,
"NeuronAliasDependencyInduction": 0.00023031234741210938,
"NeuronAliasDependencyReset": 0.021604061126708984,
"NeuronInstComb": 0.013072729110717773,
"NeuronLICM": 0.01006174087524414,
"NeuronLoopFusion": 0.017573833465576172,
"NeuronLoopInterchange": 0.0020608901977539063,
"NeuronSimplifier": 0.010074615478515625,
"NeuronSimplifyPredicates": 0.0060672760009765625,
"NeuronValueNumbering": 0.0041046142578125,
"OptimizeAliasedCopyChain": 0.0014190673828125,
"OptimizeNKIKernels": 0.0021109580993652344,
"PAGLayoutOpt": 0.3779466152191162,
"PComputeCutting": 0.008729696273803711,
"PGLayoutTilingPipeline": 1.5334703922271729,
"PGTiling": 0.47260475158691406,
"PadElimination": 0.0015625953674316406,
"ParAxesAnnotation": 0.2937772274017334,
"PartialLoopFusion": 0.016366004943847656,
"PartialSimdFusion": 0.01980447769165039,
"PerfectLoopNest": 0.0021877288818359375,
"RecognizeOpIdiom": 0.004831075668334961,
"Recompute": 0.00025010108947753906,
"RelaxPredicates": 0.0039484500885009766,
"Rematerialization": 0.004274129867553711,
"ReshapeWeights": 0.000804901123046875,
"ResolveAccessConflict": 0.0038733482360839844,
"ResolveComplicatePredicates": 0.0016858577728271484,
"RewriteReplicationMatmul": 0.0014014244079589844,
"RewriteWeights": 0.00405120849609375,
"SFKVectorizer": 0.20196890830993652,
"SimpleAllReduceTiling": 0.002203702926635742,
"Simplifier": 0.004297018051147461,
"SimplifyMacroPredicates": 0.01361393928527832,
"SimplifyNeuronTensor": 0.009984970092773438,
"SimplifySlice": 0.0010356903076171875,
"SimplifyTensor": 0.006205558776855469,
"SpillPSum": 0.016466140747070313,
"SplitAPUnionSets": 0.029446840286254883,
"SplitAccGrp": 0.0020453929901123047,
"StaticProfiler": 0.004591464996337891,
"StaticTransposeLocalTensor": 0.005173683166503906,
"SundaISel": 0.04554462432861328,
"TCTransform": 0.002426624298095703,
"TensorInitialization": 0.009510517120361328,
"TensorOpSimplifier": 0.0067560672760009766,
"TensorOpTransform": 0.028885841369628906,
"TileCCOps": 0.005466938018798828,
"TilingProfiler": 0.013426065444946289,
"TransformConvOp": 0.002458810806274414,
"TritiumFusion": 0.0620732307434082,
"ValueNumbering": 0.002520322799682617,
"VectorizeDMA": 0.005783796310424805,
"VectorizeMatMult": 0.005175352096557617,
"WeightCoalescing": 0.0029850006103515625,
"ZeroSizeTensorElimination": 0.00011801719665527344
},
"tensorizer": {
"DMATilingProfiler::TotalInstructionsAfterTiling": 2597.0,
"StaticProfiler::AifUb": 40.028141021728516,
"StaticProfiler::ArithmeticIntensityTensorizer": 420.0349426269531,
"StaticProfiler::AverageDmaLength": 1921.007568359375,
"StaticProfiler::AverageFractalPeUtilization": 99.95317840576172,
"StaticProfiler::AveragePartitionUtilization": 99.87249755859375,
"StaticProfiler::AveragePeUtilization": 99.80845642089844,
"StaticProfiler::DDRTransferBytes": 64558336.0,
"StaticProfiler::InternalTransferBytes": 52297728.0,
"StaticProfiler::LoadExpanded": 23298.0,
"StaticProfiler::LocalizationEfficiency": 1049.3489990234375,
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 1358.191162109375,
"StaticProfiler::StoreExpanded": 5505.0,
"StaticProfiler::TotalDMAExpanded": 28803.0,
"StaticProfiler::TotalDynamicInstancesCount": 3692.0,
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 3689.0,
"StaticProfiler::TotalLNCComm": 0.0,
"StaticProfiler::TotalLNCCommTransfer": 0.0,
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
"TilingProfiler::GenericInstructionsAfterTiling": 48.0,
"TilingProfiler::MatMultInstructionsAfterTiling": 1412.0,
"TilingProfiler::NumPfTransposes": 7.0,
"TilingProfiler::NumPfTransposesForIo": 1.0,
"TilingProfiler::NumPfTransposesForLocal": 5.0,
"TilingProfiler::NumPfTransposesForNonlocal": 1.0,
"TilingProfiler::PfTransposeInstructions": 608.0,
"TilingProfiler::PfTransposeInstructionsForIo": 128.0,
"TilingProfiler::PfTransposeInstructionsForLocal": 416.0,
"TilingProfiler::PfTransposeInstructionsForNonlocal": 64.0,
"TilingProfiler::ReduceInstructionsAfterTiling": 0.0,
"TilingProfiler::SimdInstructionsAfterTiling": 257.0,
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
"TransformConvOp::conv2d_column_packing": 0.0,
"TransformConvOp::conv2d_column_packing_1": 0.0,
"TransformConvOp::conv2d_column_packing_io10": 0.0,
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
}
},
"sg0001": {
"compiletime": {
"AGOrderingAnalysisPass": 0.03313565254211426,
"AffinePredicateResolution": 0.0015239715576171875,
"AliasDependencyElimination": 0.00011467933654785156,
"AliasDependencyInduction": 0.009088993072509766,
"AliasDependencyReset": 1.062025547027588,
"BFComputeCutting": 0.0024559497833251953,
"BirCodeGenLoop": 0.03748297691345215,
"CCOpFusion": 0.04092240333557129,
"CanonicalizeDAGForPGTiling": 0.004329681396484375,
"CanonicalizeIR": 0.002464771270751953,
"CoalesceCCOp": 0.004778146743774414,
"CommuteConcat": 0.0011680126190185547,
"DMALocalityOpt": 0.0016834735870361328,
"DMAProfiler": 0.0039997100830078125,
"DMATilingProfiler": 0.004555702209472656,
"DataLocalityOpt": 0.13762187957763672,
"DataStreaming": 0.0044286251068115234,
"DeConcat": 0.0015981197357177734,
"DeadCodeElimination": 0.0020780563354492188,
"DeadStoreElimination": 0.03435230255126953,
"DelinearIndices": 0.00969839096069336,
"Delinearization": 0.0038826465606689453,
"DoNothing": 9.846687316894531e-05,
"DramToDramTranspose": 0.03438973426818848,
"DumpGraphAndMetadata": 0.00426793098449707,
"EliminateDivs": 0.004217386245727539,
"ExpandBatchNorm": 0.0019202232360839844,
"ExpandISAMacro": 0.0024042129516601563,
"FactorizeBlkDims": 0.01425933837890625,
"FactorizeThreadAxesInFreeDims": 0.0026972293853759766,
"FlattenMacroLoop": 0.002768993377685547,
"GenericAccessSimplifier": 0.001058816909790039,
"InferInitValue": 0.03559255599975586,
"InferIntrinsicOnCC": 0.009636163711547852,
"InferNeuronTensor": 0.04922318458557129,
"InferNonlocalTensors": 0.030732393264770508,
"InferPSumTensor": 0.03249359130859375,
"InlineNativeKernels": 0.0014734268188476563,
"InsertIOTransposes": 0.021765470504760742,
"InsertLocalTransposes": 0.006593465805053711,
"InsertOffloadedTransposes": 0.0034906864166259766,
"LICM": 0.003262758255004883,
"LateLegalizeInst": 0.00400543212890625,
"LateLegalizePostSplit": 0.00289154052734375,
"LateLowerReshapeOp": 0.002287149429321289,
"LateLowerTensorOp": 0.0046651363372802734,
"LateNeuronInstComb": 0.019269704818725586,
"LayoutPreprocessing": 0.03711414337158203,
"LayoutPreprocessingAndAnalysis": 0.2516040802001953,
"LayoutRequirementAnalysis": 0.007753133773803711,
"LegalizeCCOpLayout": 0.003732919692993164,
"LegalizeOpLevelAlias": 0.0016019344329833984,
"LegalizePartitionReduce": 0.0020945072174072266,
"LegalizeSundaAccess": 0.016069650650024414,
"LegalizeSundaMacro": 0.010806083679199219,
"LegalizeType": 0.004706859588623047,
"LocalLayoutOpt": 0.02442765235900879,
"LoopFusion": 0.0067822933197021484,
"LoopSplitting": 0.00033974647521972656,
"LowerBroadcast": 0.0019419193267822266,
"LowerCCOpBlockAxis": 0.005570650100708008,
"LowerComplexBroadcast": 0.0020999908447265625,
"LowerIntrinsics": 0.03607368469238281,
"LowerTensorOp": 0.011876583099365234,
"LowerTranspose": 0.011530637741088867,
"MacroGeneration": 0.10653066635131836,
"MaskPropagation": 0.003092050552368164,
"MemcpyElimination": 0.10495471954345703,
"MutateDataType": 0.0014193058013916016,
"NeuronAliasDependencyInduction": 0.0002295970916748047,
"NeuronAliasDependencyReset": 0.021070480346679688,
"NeuronInstComb": 0.012903451919555664,
"NeuronLICM": 0.00844264030456543,
"NeuronLoopFusion": 0.020880460739135742,
"NeuronLoopInterchange": 0.0021686553955078125,
"NeuronSimplifier": 0.011090755462646484,
"NeuronSimplifyPredicates": 0.0016274452209472656,
"NeuronValueNumbering": 0.004062652587890625,
"OptimizeAliasedCopyChain": 0.0014641284942626953,
"OptimizeNKIKernels": 0.0023856163024902344,
"PAGLayoutOpt": 0.17638587951660156,
"PComputeCutting": 0.00709986686706543,
"PGLayoutTilingPipeline": 1.142796516418457,
"PGTiling": 0.39766955375671387,
"PadElimination": 0.0015380382537841797,
"ParAxesAnnotation": 0.09186458587646484,
"PartialLoopFusion": 0.015995025634765625,
"PartialSimdFusion": 0.026766300201416016,
"PerfectLoopNest": 0.002192258834838867,
"RecognizeOpIdiom": 0.004943370819091797,
"Recompute": 0.00025773048400878906,
"RelaxPredicates": 0.003591299057006836,
"Rematerialization": 0.0025196075439453125,
"ReshapeWeights": 0.0007069110870361328,
"ResolveAccessConflict": 0.00481104850769043,
"ResolveComplicatePredicates": 0.002285003662109375,
"RewriteReplicationMatmul": 0.0021715164184570313,
"RewriteWeights": 0.003401041030883789,
"SFKVectorizer": 0.14661574363708496,
"SimpleAllReduceTiling": 0.0016207695007324219,
"Simplifier": 0.00443577766418457,
"SimplifyMacroPredicates": 0.006165742874145508,
"SimplifyNeuronTensor": 0.006829500198364258,
"SimplifySlice": 0.0013000965118408203,
"SimplifyTensor": 0.0061337947845458984,
"SpillPSum": 0.018761634826660156,
"SplitAPUnionSets": 0.017923593521118164,
"SplitAccGrp": 0.002531290054321289,
"StaticProfiler": 0.003990888595581055,
"StaticTransposeLocalTensor": 0.004915952682495117,
"SundaISel": 0.04209589958190918,
"TCTransform": 0.0012347698211669922,
"TensorInitialization": 0.002599954605102539,
"TensorOpSimplifier": 0.006845712661743164,
"TensorOpTransform": 0.03345227241516113,
"TileCCOps": 0.005617856979370117,
"TilingProfiler": 0.015013933181762695,
"TransformConvOp": 0.002393960952758789,
"TritiumFusion": 0.09340715408325195,
"ValueNumbering": 0.0031540393829345703,
"VectorizeDMA": 0.0015842914581298828,
"VectorizeMatMult": 0.0071103572845458984,
"WeightCoalescing": 0.0026235580444335938,
"ZeroSizeTensorElimination": 0.0001163482666015625
},
"tensorizer": {
"DMATilingProfiler::TotalInstructionsAfterTiling": 7847.0,
"StaticProfiler::AifUb": 490.6532287597656,
"StaticProfiler::ArithmeticIntensityTensorizer": 487.63507080078125,
"StaticProfiler::AverageDmaLength": 869.1515502929688,
"StaticProfiler::AverageFractalPeUtilization": 100.0,
"StaticProfiler::AveragePartitionUtilization": 99.83790588378906,
"StaticProfiler::AveragePeUtilization": 100.0,
"StaticProfiler::DDRTransferBytes": 215827456.0,
"StaticProfiler::InternalTransferBytes": 43515904.0,
"StaticProfiler::LoadExpanded": 238976.0,
"StaticProfiler::LocalizationEfficiency": 99.38487243652344,
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 107.76165771484375,
"StaticProfiler::StoreExpanded": 5121.0,
"StaticProfiler::TotalDMAExpanded": 244097.0,
"StaticProfiler::TotalDynamicInstancesCount": 9872.0,
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 9872.0,
"StaticProfiler::TotalLNCComm": 0.0,
"StaticProfiler::TotalLNCCommTransfer": 0.0,
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
"TilingProfiler::GenericInstructionsAfterTiling": 32.0,
"TilingProfiler::MatMultInstructionsAfterTiling": 6016.0,
"TilingProfiler::NumPfTransposes": 8.0,
"TilingProfiler::NumPfTransposesForIo": 3.0,
"TilingProfiler::NumPfTransposesForLocal": 3.0,
"TilingProfiler::NumPfTransposesForNonlocal": 2.0,
"TilingProfiler::PfTransposeInstructions": 680.0,
"TilingProfiler::PfTransposeInstructionsForIo": 136.0,
"TilingProfiler::PfTransposeInstructionsForLocal": 288.0,
"TilingProfiler::PfTransposeInstructionsForNonlocal": 256.0,
"TilingProfiler::ReduceInstructionsAfterTiling": 0.0,
"TilingProfiler::SimdInstructionsAfterTiling": 288.0,
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
"TransformConvOp::conv2d_column_packing": 0.0,
"TransformConvOp::conv2d_column_packing_1": 0.0,
"TransformConvOp::conv2d_column_packing_io10": 0.0,
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
}
},
"sg0002": {
"compiletime": {
"AGOrderingAnalysisPass": 0.018257856369018555,
"AffinePredicateResolution": 0.0011677742004394531,
"AliasDependencyElimination": 0.0001201629638671875,
"AliasDependencyInduction": 0.0052988529205322266,
"AliasDependencyReset": 0.029210567474365234,
"BFComputeCutting": 0.0032625198364257813,
"BirCodeGenLoop": 0.4527714252471924,
"CCOpFusion": 0.02410125732421875,
"CanonicalizeDAGForPGTiling": 0.004324913024902344,
"CanonicalizeIR": 0.0019502639770507813,
"CoalesceCCOp": 0.014463186264038086,
"CommuteConcat": 0.0008339881896972656,
"DMALocalityOpt": 0.005598783493041992,
"DMAProfiler": 0.01209115982055664,
"DMATilingProfiler": 0.004332065582275391,
"DataLocalityOpt": 0.07260942459106445,
"DataStreaming": 0.03940248489379883,
"DeConcat": 0.0005326271057128906,
"DeadCodeElimination": 0.0009255409240722656,
"DeadStoreElimination": 0.0055675506591796875,
"DelinearIndices": 0.004735231399536133,
"Delinearization": 0.0030374526977539063,
"DoNothing": 7.033348083496094e-05,
"DramToDramTranspose": 0.018135547637939453,
"DumpGraphAndMetadata": 0.09476375579833984,
"EliminateDivs": 0.002595663070678711,
"ExpandBatchNorm": 0.002063274383544922,
"ExpandISAMacro": 0.011472225189208984,
"FactorizeBlkDims": 0.008858203887939453,
"FactorizeThreadAxesInFreeDims": 0.0010046958923339844,
"FlattenMacroLoop": 0.002232074737548828,
"GenericAccessSimplifier": 0.0018167495727539063,
"InferInitValue": 0.024865150451660156,
"InferIntrinsicOnCC": 0.009101152420043945,
"InferNeuronTensor": 0.023293495178222656,
"InferNonlocalTensors": 0.01632833480834961,
"InferPSumTensor": 0.27681708335876465,
"InlineNativeKernels": 0.0081634521484375,
"InsertIOTransposes": 0.019203901290893555,
"InsertLocalTransposes": 0.0042340755462646484,
"InsertOffloadedTransposes": 0.002811431884765625,
"LICM": 0.0029730796813964844,
"LateLegalizeInst": 0.01390385627746582,
"LateLegalizePostSplit": 0.012536048889160156,
"LateLowerReshapeOp": 0.0018641948699951172,
"LateLowerTensorOp": 0.0014081001281738281,
"LateNeuronInstComb": 0.008648872375488281,
"LayoutPreprocessing": 0.02658390998840332,
"LayoutPreprocessingAndAnalysis": 0.10707235336303711,
"LayoutRequirementAnalysis": 0.005135536193847656,
"LegalizeCCOpLayout": 0.002307415008544922,
"LegalizeOpLevelAlias": 0.0012297630310058594,
"LegalizePartitionReduce": 0.0010194778442382813,
"LegalizeSundaAccess": 0.07593941688537598,
"LegalizeSundaMacro": 0.010968446731567383,
"LegalizeType": 0.011834383010864258,
"LocalLayoutOpt": 0.013799905776977539,
"LoopFusion": 0.0052182674407958984,
"LoopSplitting": 0.0003161430358886719,
"LowerBroadcast": 0.0013611316680908203,
"LowerCCOpBlockAxis": 0.0040547847747802734,
"LowerComplexBroadcast": 0.002165079116821289,
"LowerIntrinsics": 0.31132984161376953,
"LowerTensorOp": 0.010558843612670898,
"LowerTranspose": 0.012272357940673828,
"MacroGeneration": 0.029862642288208008,
"MaskPropagation": 0.002757549285888672,
"MemcpyElimination": 0.025969266891479492,
"MutateDataType": 0.002087831497192383,
"NeuronAliasDependencyInduction": 0.00016880035400390625,
"NeuronAliasDependencyReset": 0.020352602005004883,
"NeuronInstComb": 0.004126310348510742,
"NeuronLICM": 0.0351865291595459,
"NeuronLoopFusion": 0.007991313934326172,
"NeuronLoopInterchange": 0.002409219741821289,
"NeuronSimplifier": 0.007069587707519531,
"NeuronSimplifyPredicates": 0.12138772010803223,
"NeuronValueNumbering": 0.0028395652770996094,
"OptimizeAliasedCopyChain": 0.0005936622619628906,
"OptimizeNKIKernels": 0.5374257564544678,
"PAGLayoutOpt": 0.08115577697753906,
"PComputeCutting": 0.004801273345947266,
"PGLayoutTilingPipeline": 0.5454635620117188,
"PGTiling": 0.14933419227600098,
"PadElimination": 0.00034046173095703125,
"ParAxesAnnotation": 0.053552865982055664,
"PartialLoopFusion": 0.0067539215087890625,
"PartialSimdFusion": 0.00693058967590332,
"PerfectLoopNest": 0.0035321712493896484,
"RecognizeOpIdiom": 0.003947257995605469,
"Recompute": 0.00024962425231933594,
"RelaxPredicates": 0.013285398483276367,
"Rematerialization": 0.002062082290649414,
"ReshapeWeights": 0.002131223678588867,
"ResolveAccessConflict": 0.0038597583770751953,
"ResolveComplicatePredicates": 0.002032756805419922,
"RewriteReplicationMatmul": 0.001924753189086914,
"RewriteWeights": 0.002452373504638672,
"SFKVectorizer": 0.2690722942352295,
"SimpleAllReduceTiling": 0.008755922317504883,
"Simplifier": 0.004038810729980469,
"SimplifyMacroPredicates": 0.010622739791870117,
"SimplifyNeuronTensor": 1.059011697769165,
"SimplifySlice": 0.0009577274322509766,
"SimplifyTensor": 0.005341768264770508,
"SpillPSum": 0.011537313461303711,
"SplitAPUnionSets": 0.10771751403808594,
"SplitAccGrp": 0.002201557159423828,
"StaticProfiler": 0.012447118759155273,
"StaticTransposeLocalTensor": 0.0038712024688720703,
"SundaISel": 0.04214668273925781,
"TCTransform": 0.0008432865142822266,
"TensorInitialization": 0.012825727462768555,
"TensorOpSimplifier": 0.004651308059692383,
"TensorOpTransform": 0.019537687301635742,
"TileCCOps": 0.006766319274902344,
"TilingProfiler": 0.006911277770996094,
"TransformConvOp": 0.0030303001403808594,
"TritiumFusion": 0.04502224922180176,
"ValueNumbering": 0.001996755599975586,
"VectorizeDMA": 0.0019402503967285156,
"VectorizeMatMult": 0.0027413368225097656,
"WeightCoalescing": 0.00829005241394043,
"ZeroSizeTensorElimination": 0.00013709068298339844
},
"tensorizer": {
"DMATilingProfiler::TotalInstructionsAfterTiling": 44382.0,
"StaticProfiler::AifUb": 205.154296875,
"StaticProfiler::ArithmeticIntensityTensorizer": 201.6046905517578,
"StaticProfiler::AverageDmaLength": 1901.806396484375,
"StaticProfiler::AverageFractalPeUtilization": 99.66542053222656,
"StaticProfiler::AveragePartitionUtilization": 97.7269515991211,
"StaticProfiler::AveragePeUtilization": 98.64861297607422,
"StaticProfiler::DDRTransferBytes": 795531072.0,
"StaticProfiler::InternalTransferBytes": 646388224.0,
"StaticProfiler::LoadExpanded": 376342.0,
"StaticProfiler::LocalizationEfficiency": 98.26979064941406,
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 101.01405334472656,
"StaticProfiler::StoreExpanded": 4189.0,
"StaticProfiler::TotalDMAExpanded": 380531.0,
"StaticProfiler::TotalDynamicInstancesCount": 53882.0,
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 53436.0,
"StaticProfiler::TotalLNCComm": 0.0,
"StaticProfiler::TotalLNCCommTransfer": 0.0,
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
"TilingProfiler::GenericInstructionsAfterTiling": 4.0,
"TilingProfiler::MatMultInstructionsAfterTiling": 23616.0,
"TilingProfiler::NumPfTransposes": 5.0,
"TilingProfiler::NumPfTransposesForIo": 1.0,
"TilingProfiler::NumPfTransposesForLocal": 1.0,
"TilingProfiler::NumPfTransposesForNonlocal": 3.0,
"TilingProfiler::PfTransposeInstructions": 19393.0,
"TilingProfiler::PfTransposeInstructionsForIo": 19008.0,
"TilingProfiler::PfTransposeInstructionsForLocal": 1.0,
"TilingProfiler::PfTransposeInstructionsForNonlocal": 384.0,
"TilingProfiler::ReduceInstructionsAfterTiling": 4.0,
"TilingProfiler::SimdInstructionsAfterTiling": 158.0,
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
"TransformConvOp::conv2d_column_packing": 0.0,
"TransformConvOp::conv2d_column_packing_1": 0.0,
"TransformConvOp::conv2d_column_packing_io10": 0.0,
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
}
},
"sg01": {
"compiletime": {
"CanonicalizeConv": 1.2000000424450263e-05,
"CanonicalizeForTensorizer": 1.2999999853491317e-05,
"Canonicalizer": 0.0002500000118743628,
"HoistCompute": 3.000000106112566e-06,
"IdentifyCrossPassTensors": 2.300000051036477e-05,
"MemcastMotion": 1.1000000085914508e-05,
"PenguinizeFunctions": 1.4000000192027073e-05,
"PruneFunctions": 3.099999958067201e-05,
"RemoveOptimizationBarriers": 2.2000000171829015e-05,
"ScatterMotion": 2.9999999242136255e-05,
"TensorizerLegalizationPass": 1.700000029813964e-05,
"VerifySupportedOps": 9.000000318337698e-06,
"algsimp": 6.299999949987978e-05,
"batchnorm_expander": 1.2999999853491317e-05,
"boundary-marker-removal": 4.999999873689376e-06,
"call-inliner": 9.000000318337698e-06,
"canonicalize-boundary-marker": 6.000000212225132e-06,
"collective-stream-id-checker": 3.000000106112566e-06,
"comparison-expander": 4.999999873689376e-06,
"computation-deduplicator": 1.8000000636675395e-05,
"conditional-to-select": 4.999999873689376e-06,
"config-lowering": 2.5999999706982635e-05,
"constant_folding": 7.999999979801942e-06,
"cse": 1.2000000424450263e-05,
"dce": 9.999999974752427e-07,
"dynamic-slice-transpose": 3.999999989900971e-06,
"eliminate-redundant-compare": 3.999999989900971e-06,
"emit-offloaded-dropout": 1.2999999853491317e-05,
"flatten-call-graph": 7.999999979801942e-06,
"fuse-send-recv": 2.099999983329326e-05,
"hilo::LegalizeAlias": 4.999999873689376e-06,
"hilo::NeuronInstCombine": 4.5000000682193786e-05,
"hilo::NeuronOpFusion": 1.700000029813964e-05,
"hilo::ReplaceTokenTypeWithU8Pass": 2.099999983329326e-05,
"hilo::ScheduleFusion": 9.999999974752427e-07,
"hilo::SixtyFourHack": 1.2999999853491317e-05,
"hilo::VerifyAliasing": 1.9999999949504854e-06,
"hlo-mac-count": 2.9999999242136255e-05,
"hlo-verifier": 0.00018000000272877514,
"legalize-ccops": 9.999999974752427e-07,
"legalize-compare": 3.999999989900971e-06,
"lower-argminmax-custom-call": 3.999999989900971e-06,
"map-inline": 1.2000000424450263e-05,
"metadata-naming": 1.8000000636675395e-05,
"mlir::detail::OpToOpPassAdaptor": 9.999999747378752e-05,
"mlir::hlo::MhloToPyPenguin": 0.0009420000133104622,
"mlir::mhlo::LowerComplexExtraPass": 7.999999797903001e-05,
"mlir::mhlo::LowerComplexPass": 0.00015799999528098851,
"native-to-custom-softmax": 6.000000212225132e-06,
"native-to-custom-softmax-dx": 1.2999999853491317e-05,
"operand_upcaster": 1.4999999621068127e-05,
"post-par-pipe-begin": 1.9999999949504854e-06,
"post-par-pipe-end": 0.0,
"post-partition-simplification": 0.0005130000063218176,
"replace-minimum-constant": 6.000000212225132e-06,
"reshape-mover": 3.000000106112566e-06,
"simplify-concat": 4.8999998398358e-05,
"simplify-while-loops": 1.9999999949504854e-06,
"transform-variadic-reduce": 9.000000318337698e-06,
"tuple-simplifier": 4.999999873689376e-06,
"unpack-nested-aws-ntwsr": 3.999999989900971e-06,
"unroll-while-loop": 0.0
},
"hilo": {
"ArithmeticIntensity": 457.20416259765625,
"HloMacCount": 50465865728.0,
"Traffic": 220758560.0
}
},
"sg02": {
"compiletime": {
"CanonicalizeConv": 0.0002589999930933118,
"CanonicalizeForTensorizer": 1.2000000424450263e-05,
"Canonicalizer": 0.0003060000017285347,
"HoistCompute": 1.9999999949504854e-06,
"IdentifyCrossPassTensors": 2.4000000848900527e-05,
"MemcastMotion": 1.2999999853491317e-05,
"PenguinizeFunctions": 9.000000318337698e-06,
"PruneFunctions": 7.999999979801942e-06,
"RemoveOptimizationBarriers": 2.099999983329326e-05,
"ScatterMotion": 1.9999999949504854e-06,
"TensorizerLegalizationPass": 4.999999873689376e-06,
"VerifySupportedOps": 1.1000000085914508e-05,
"algsimp": 5.900000178371556e-05,
"batchnorm_expander": 1.2000000424450263e-05,
"boundary-marker-removal": 3.999999989900971e-06,
"call-inliner": 1.1000000085914508e-05,
"canonicalize-boundary-marker": 4.999999873689376e-06,
"collective-stream-id-checker": 3.000000106112566e-06,
"comparison-expander": 4.999999873689376e-06,
"computation-deduplicator": 4.099999932805076e-05,
"conditional-to-select": 6.000000212225132e-06,
"config-lowering": 2.5999999706982635e-05,
"constant_folding": 9.000000318337698e-06,
"cse": 1.1000000085914508e-05,
"dce": 9.999999974752427e-07,
"dynamic-slice-transpose": 3.999999989900971e-06,
"eliminate-redundant-compare": 3.999999989900971e-06,
"emit-offloaded-dropout": 1.2999999853491317e-05,
"flatten-call-graph": 1.1000000085914508e-05,
"fuse-send-recv": 1.8000000636675395e-05,
"hilo::LegalizeAlias": 1.9999999949504854e-06,
"hilo::NeuronInstCombine": 5.8000001445179805e-05,
"hilo::NeuronOpFusion": 1.9999999949504854e-06,
"hilo::ReplaceTokenTypeWithU8Pass": 9.000000318337698e-06,
"hilo::ScheduleFusion": 3.999999989900971e-06,
"hilo::SixtyFourHack": 3.899999865097925e-05,
"hilo::VerifyAliasing": 9.999999974752427e-07,
"hlo-mac-count": 0.00016799999866634607,
"hlo-verifier": 0.00015900000289548188,
"legalize-ccops": 9.999999974752427e-07,
"legalize-compare": 3.000000106112566e-06,
"lower-argminmax-custom-call": 3.000000106112566e-06,
"map-inline": 1.2000000424450263e-05,
"metadata-naming": 1.700000029813964e-05,
"mlir::detail::OpToOpPassAdaptor": 2.2000000171829015e-05,
"mlir::hlo::MhloToPyPenguin": 0.0008549999911338091,
"mlir::mhlo::LowerComplexExtraPass": 8.800000068731606e-05,
"mlir::mhlo::LowerComplexPass": 0.00015999999595806003,
"native-to-custom-softmax": 4.999999873689376e-06,
"native-to-custom-softmax-dx": 1.9999999494757503e-05,
"operand_upcaster": 1.2000000424450263e-05,
"post-par-pipe-begin": 1.9999999949504854e-06,
"post-par-pipe-end": 0.0,
"post-partition-simplification": 0.0005009999731555581,
"replace-minimum-constant": 9.000000318337698e-06,
"reshape-mover": 3.000000106112566e-06,
"simplify-concat": 4.3000000005122274e-05,
"simplify-while-loops": 1.9999999949504854e-06,
"transform-variadic-reduce": 4.8000001697801054e-05,
"tuple-simplifier": 4.999999873689376e-06,
"unpack-nested-aws-ntwsr": 3.999999989900971e-06,
"unroll-while-loop": 0.0
},
"hilo": {
"ArithmeticIntensity": 99.1578140258789,
"HloMacCount": 38965870592.0,
"Traffic": 785936448.0
}
}
}