| { |
| "Average": { |
| "tensorizer": { |
| "StaticProfiler::AverageFractalPeUtilization": 99.66542053222656, |
| "StaticProfiler::AveragePartitionUtilization": 97.7269515991211, |
| "StaticProfiler::AveragePeUtilization": 98.64861297607422, |
| "StaticProfiler::LocalizationEfficiency": 98.26979064941406, |
| "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 101.01405334472656, |
| "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0, |
| "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0 |
| } |
| }, |
| "Count": { |
| "tensorizer": { |
| "StaticProfiler::AverageFractalPeUtilization": 1.0, |
| "StaticProfiler::AveragePartitionUtilization": 1.0, |
| "StaticProfiler::AveragePeUtilization": 1.0, |
| "StaticProfiler::LocalizationEfficiency": 1.0, |
| "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 1.0, |
| "TilingProfiler::AveragePartitionUtilizationAfterTiling": 1.0, |
| "TilingProfiler::AveragePeUtilizationAfterTiling": 1.0 |
| } |
| }, |
| "Sum": { |
| "compiletime": { |
| "AGOrderingAnalysisPass": 0.018257856369018555, |
| "AffinePredicateResolution": 0.0011677742004394531, |
| "AliasDependencyElimination": 0.0001201629638671875, |
| "AliasDependencyInduction": 0.0052988529205322266, |
| "AliasDependencyReset": 0.029210567474365234, |
| "BFComputeCutting": 0.0032625198364257813, |
| "BirCodeGenLoop": 0.4527714252471924, |
| "CCOpFusion": 0.02410125732421875, |
| "CanonicalizeConv": 0.00029399999766610563, |
| "CanonicalizeDAGForPGTiling": 0.004324913024902344, |
| "CanonicalizeForTensorizer": 4.8000001697801054e-05, |
| "CanonicalizeIR": 0.0019502639770507813, |
| "Canonicalizer": 0.0010809999657794833, |
| "CoalesceCCOp": 0.014672040939331055, |
| "CommuteConcat": 0.0008339881896972656, |
| "DMALocalityOpt": 0.005767107009887695, |
| "DMAProfiler": 0.012850046157836914, |
| "DMATilingProfiler": 0.004332065582275391, |
| "DataLocalityOpt": 0.07260942459106445, |
| "DataStreaming": 0.03969836235046387, |
| "DeConcat": 0.0005326271057128906, |
| "DeadCodeElimination": 0.0009255409240722656, |
| "DeadStoreElimination": 0.0055675506591796875, |
| "DelinearIndices": 0.004735231399536133, |
| "Delinearization": 0.0030374526977539063, |
| "DoNothing": 0.00018930435180664063, |
| "DramToDramTranspose": 0.018135547637939453, |
| "DumpGraphAndMetadata": 0.09476375579833984, |
| "EliminateDivs": 0.002595663070678711, |
| "ExpandBatchNorm": 0.002063274383544922, |
| "ExpandISAMacro": 0.011973381042480469, |
| "FactorizeBlkDims": 0.009292840957641602, |
| "FactorizeThreadAxesInFreeDims": 0.0010046958923339844, |
| "FlattenMacroLoop": 0.002232074737548828, |
| "GenericAccessSimplifier": 0.0018167495727539063, |
| "HoistCompute": 7.999999979801942e-06, |
| "IdentifyCrossPassTensors": 7.79999973019585e-05, |
| "InferInitValue": 0.024865150451660156, |
| "InferIntrinsicOnCC": 0.009101152420043945, |
| "InferNeuronTensor": 0.023293495178222656, |
| "InferNonlocalTensors": 0.01632833480834961, |
| "InferPSumTensor": 0.27726316452026367, |
| "InlineNativeKernels": 0.0081634521484375, |
| "InsertIOTransposes": 0.019203901290893555, |
| "InsertLocalTransposes": 0.0042340755462646484, |
| "InsertOffloadedTransposes": 0.002811431884765625, |
| "LICM": 0.0029730796813964844, |
| "LateLegalizeInst": 0.014307022094726563, |
| "LateLegalizePostSplit": 0.012536048889160156, |
| "LateLowerReshapeOp": 0.0018641948699951172, |
| "LateLowerTensorOp": 0.0014081001281738281, |
| "LateNeuronInstComb": 0.00915217399597168, |
| "LayoutPreprocessing": 0.02658390998840332, |
| "LayoutPreprocessingAndAnalysis": 0.10707235336303711, |
| "LayoutRequirementAnalysis": 0.005135536193847656, |
| "LegalizeCCOpLayout": 0.002307415008544922, |
| "LegalizeOpLevelAlias": 0.0012297630310058594, |
| "LegalizePartitionReduce": 0.0010194778442382813, |
| "LegalizeSundaAccess": 0.07808256149291992, |
| "LegalizeSundaMacro": 0.010968446731567383, |
| "LegalizeType": 0.012074947357177734, |
| "LocalLayoutOpt": 0.013799905776977539, |
| "LoopFusion": 0.0052182674407958984, |
| "LoopSplitting": 0.0003161430358886719, |
| "LowerBroadcast": 0.0015821456909179688, |
| "LowerCCOpBlockAxis": 0.0040547847747802734, |
| "LowerComplexBroadcast": 0.002165079116821289, |
| "LowerIntrinsics": 0.31156492233276367, |
| "LowerTensorOp": 0.010558843612670898, |
| "LowerTranspose": 0.012494325637817383, |
| "MacroGeneration": 0.029862642288208008, |
| "MaskPropagation": 0.002757549285888672, |
| "MemcastMotion": 3.400000059627928e-05, |
| "MemcpyElimination": 0.025969266891479492, |
| "MutateDataType": 0.002087831497192383, |
| "NeuronAliasDependencyInduction": 0.00016880035400390625, |
| "NeuronAliasDependencyReset": 0.020352602005004883, |
| "NeuronInstComb": 0.004656076431274414, |
| "NeuronLICM": 0.03560137748718262, |
| "NeuronLoopFusion": 0.007991313934326172, |
| "NeuronLoopInterchange": 0.002409219741821289, |
| "NeuronSimplifier": 0.007069587707519531, |
| "NeuronSimplifyPredicates": 0.12419009208679199, |
| "NeuronValueNumbering": 0.0032753944396972656, |
| "OptimizeAliasedCopyChain": 0.0005936622619628906, |
| "OptimizeNKIKernels": 0.5374257564544678, |
| "PAGLayoutOpt": 0.08115577697753906, |
| "PComputeCutting": 0.004801273345947266, |
| "PGLayoutTilingPipeline": 0.5454635620117188, |
| "PGTiling": 0.14933419227600098, |
| "PadElimination": 0.00034046173095703125, |
| "ParAxesAnnotation": 0.053552865982055664, |
| "PartialLoopFusion": 0.0067539215087890625, |
| "PartialSimdFusion": 0.00693058967590332, |
| "PenguinizeFunctions": 4.5000000682193786e-05, |
| "PerfectLoopNest": 0.0035321712493896484, |
| "PruneFunctions": 5.199999941396527e-05, |
| "RecognizeOpIdiom": 0.003947257995605469, |
| "Recompute": 0.00024962425231933594, |
| "RelaxPredicates": 0.013285398483276367, |
| "Rematerialization": 0.002062082290649414, |
| "RemoveOptimizationBarriers": 8.70000003487803e-05, |
| "ReshapeWeights": 0.002131223678588867, |
| "ResolveAccessConflict": 0.0038597583770751953, |
| "ResolveComplicatePredicates": 0.002032756805419922, |
| "RewriteReplicationMatmul": 0.001924753189086914, |
| "RewriteWeights": 0.002452373504638672, |
| "SFKVectorizer": 0.2718319892883301, |
| "ScatterMotion": 3.7999998312443495e-05, |
| "SimpleAllReduceTiling": 0.008960247039794922, |
| "Simplifier": 0.004038810729980469, |
| "SimplifyMacroPredicates": 0.010622739791870117, |
| "SimplifyNeuronTensor": 1.0594146251678467, |
| "SimplifySlice": 0.0009577274322509766, |
| "SimplifyTensor": 0.005341768264770508, |
| "SpillPSum": 0.012076139450073242, |
| "SplitAPUnionSets": 0.10771751403808594, |
| "SplitAccGrp": 0.002201557159423828, |
| "StaticProfiler": 0.012447118759155273, |
| "StaticTransposeLocalTensor": 0.0038712024688720703, |
| "SundaISel": 0.04214668273925781, |
| "TCTransform": 0.0008432865142822266, |
| "TensorInitialization": 0.012825727462768555, |
| "TensorOpSimplifier": 0.004651308059692383, |
| "TensorOpTransform": 0.019537687301635742, |
| "TensorizerLegalizationPass": 5.7999997807201e-05, |
| "TileCCOps": 0.006766319274902344, |
| "TilingProfiler": 0.006911277770996094, |
| "TransformConvOp": 0.0030303001403808594, |
| "TritiumFusion": 0.04502224922180176, |
| "ValueNumbering": 0.001996755599975586, |
| "VectorizeDMA": 0.0019402503967285156, |
| "VectorizeMatMult": 0.0027413368225097656, |
| "VerifySupportedOps": 3.7000001611886546e-05, |
| "WeightCoalescing": 0.008520841598510742, |
| "ZeroSizeTensorElimination": 0.00013709068298339844, |
| "algsimp": 0.0026940000243484974, |
| "batchnorm_expander": 4.400000034365803e-05, |
| "boundary-marker-removal": 1.5999999959603883e-05, |
| "call-inliner": 0.00046999999904073775, |
| "canonicalize-boundary-marker": 1.8999999156221747e-05, |
| "collective-stream-id-checker": 7.300000288523734e-05, |
| "comparison-expander": 0.0005740000051446259, |
| "computation-deduplicator": 7.999999797903001e-05, |
| "conditional-to-select": 1.8000000636675395e-05, |
| "config-lowering": 0.0003279999946244061, |
| "constant-statistics": 0.0005329999839887023, |
| "constant_folding": 0.0003260000084992498, |
| "cse": 4.5000000682193786e-05, |
| "dce": 8.399999933317304e-05, |
| "dot_decomposer": 0.0013409999664872885, |
| "dynamic-slice-transpose": 1.3999999282532372e-05, |
| "eliminate-redundant-compare": 0.0002959999837912619, |
| "emit-offloaded-dropout": 6.399999983841553e-05, |
| "flatten-call-graph": 0.0009319999953731894, |
| "fuse-send-recv": 6.999999459367245e-05, |
| "hilo::LegalizeAlias": 1.3999999282532372e-05, |
| "hilo::NeuronInstCombine": 0.0001660000125411898, |
| "hilo::NeuronOpFusion": 2.5000001187436283e-05, |
| "hilo::ReplaceTokenTypeWithU8Pass": 5.2999999752501026e-05, |
| "hilo::ScheduleFusion": 7.000000096013537e-06, |
| "hilo::SixtyFourHack": 7.299999560927972e-05, |
| "hilo::VerifyAliasing": 6.000000212225132e-06, |
| "hlo-mac-count": 0.0013429999817162752, |
| "hlo-verifier": 0.007542999926954508, |
| "instruction-histogram": 0.0006709999870508909, |
| "io-con-pipe-begin": 4.999999873689376e-06, |
| "io-con-pipe-end": 9.999999974752427e-07, |
| "io-layout-normalization": 0.001310999970883131, |
| "io-statistics": 8.499999967170879e-05, |
| "legalize-ccops": 3.999999989900971e-06, |
| "legalize-compare": 1.2999999853491317e-05, |
| "lower-argminmax-custom-call": 1.300000076298602e-05, |
| "map-inline": 0.0008850000449456275, |
| "metadata-naming": 5.999999848427251e-05, |
| "mlir::detail::OpToOpPassAdaptor": 0.00014399999054148793, |
| "mlir::hlo::MhloToPyPenguin": 0.004429999738931656, |
| "mlir::mhlo::LowerComplexExtraPass": 0.00027299998328089714, |
| "mlir::mhlo::LowerComplexPass": 0.0004909999552182853, |
| "native-to-custom-softmax": 0.0007070000283420086, |
| "native-to-custom-softmax-dx": 0.0005990000208839774, |
| "operand_upcaster": 4.900000203633681e-05, |
| "opt-barrier-removal": 0.0005510000046342611, |
| "post-par-pipe-begin": 8.999999408842996e-06, |
| "post-par-pipe-end": 0.0, |
| "post-partition-simplification": 0.0018570000538602471, |
| "pre-par-pipe-begin": 9.999999974752427e-07, |
| "pre-par-pipe-end": 0.0, |
| "pre-partition-simplification": 0.12893199920654297, |
| "replace-minimum-constant": 0.0004569999873638153, |
| "reshape-mover": 0.00012599999899975955, |
| "simplify-concat": 0.00015899998834356666, |
| "simplify-while-loops": 0.00010400000610388815, |
| "transform-variadic-reduce": 7.000000186963007e-05, |
| "tuple-simplifier": 0.0003150000120513141, |
| "unpack-nested-aws-ntwsr": 0.0004349999944679439, |
| "unroll-while-loop": 2.099999983329326e-05, |
| "zero_sized_hlo_elimination": 0.0008670000243000686 |
| }, |
| "hilo": { |
| "ConstantSize": 1189157.0, |
| "HloInputCount": 475.0, |
| "HloMacCount": 101242896384.0, |
| "HloOutputCount": 73.0, |
| "IfmapSize": 8266545152.0, |
| "OfmapSize": 75497472.0, |
| "OutputsReadFromCount": 0.0, |
| "PassthroughTensorsCount": 0.0, |
| "RedundantOutputCount": 0.0, |
| "Traffic": 1692493184.0 |
| }, |
| "tensorizer": { |
| "DMATilingProfiler::TotalInstructionsAfterTiling": 44382.0, |
| "StaticProfiler::AifUb": 205.154296875, |
| "StaticProfiler::ArithmeticIntensityTensorizer": 201.6046905517578, |
| "StaticProfiler::AverageDmaLength": 1901.806396484375, |
| "StaticProfiler::DDRTransferBytes": 795531072.0, |
| "StaticProfiler::InternalTransferBytes": 646388224.0, |
| "StaticProfiler::LoadExpanded": 376342.0, |
| "StaticProfiler::StoreExpanded": 4189.0, |
| "StaticProfiler::TotalDMAExpanded": 380531.0, |
| "StaticProfiler::TotalDynamicInstancesCount": 53882.0, |
| "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 53436.0, |
| "StaticProfiler::TotalLNCComm": 0.0, |
| "StaticProfiler::TotalLNCCommTransfer": 0.0, |
| "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0, |
| "TilingProfiler::DmaInstructionsAfterTiling": 0.0, |
| "TilingProfiler::GenericInstructionsAfterTiling": 4.0, |
| "TilingProfiler::MatMultInstructionsAfterTiling": 23616.0, |
| "TilingProfiler::NumPfTransposes": 5.0, |
| "TilingProfiler::NumPfTransposesForIo": 1.0, |
| "TilingProfiler::NumPfTransposesForLocal": 1.0, |
| "TilingProfiler::NumPfTransposesForNonlocal": 3.0, |
| "TilingProfiler::PfTransposeInstructions": 19393.0, |
| "TilingProfiler::PfTransposeInstructionsForIo": 19008.0, |
| "TilingProfiler::PfTransposeInstructionsForLocal": 1.0, |
| "TilingProfiler::PfTransposeInstructionsForNonlocal": 384.0, |
| "TilingProfiler::ReduceInstructionsAfterTiling": 4.0, |
| "TilingProfiler::SimdInstructionsAfterTiling": 158.0, |
| "TilingProfiler::TotalInstructionsAfterTiling": 0.0, |
| "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0, |
| "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0, |
| "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0, |
| "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0, |
| "TransformConvOp::conv2d_column_packing": 0.0, |
| "TransformConvOp::conv2d_column_packing_1": 0.0, |
| "TransformConvOp::conv2d_column_packing_io10": 0.0, |
| "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0 |
| } |
| }, |
| "all": { |
| "compiletime": { |
| "algsimp": 0.002466999925673008, |
| "call-inliner": 0.0004360000020824373, |
| "collective-stream-id-checker": 6.299999949987978e-05, |
| "comparison-expander": 0.0005569999921135604, |
| "constant-statistics": 0.0005329999839887023, |
| "constant_folding": 0.0002969999914057553, |
| "dce": 7.999999797903001e-05, |
| "dot_decomposer": 0.0013409999664872885, |
| "eliminate-redundant-compare": 0.00028199999360367656, |
| "flatten-call-graph": 0.0008999999845400453, |
| "hlo-mac-count": 0.0010720000136643648, |
| "hlo-verifier": 0.0069679999724030495, |
| "instruction-histogram": 0.0006709999870508909, |
| "io-con-pipe-begin": 4.999999873689376e-06, |
| "io-con-pipe-end": 9.999999974752427e-07, |
| "io-layout-normalization": 0.001310999970883131, |
| "io-statistics": 8.499999967170879e-05, |
| "map-inline": 0.0008440000237897038, |
| "native-to-custom-softmax": 0.0006750000175088644, |
| "native-to-custom-softmax-dx": 0.0005000000237487257, |
| "opt-barrier-removal": 0.0005510000046342611, |
| "pre-par-pipe-begin": 9.999999974752427e-07, |
| "pre-par-pipe-end": 0.0, |
| "pre-partition-simplification": 0.12893199920654297, |
| "replace-minimum-constant": 0.0004309999931138009, |
| "reshape-mover": 0.00011500000255182385, |
| "simplify-while-loops": 9.600000339560211e-05, |
| "tuple-simplifier": 0.0002969999914057553, |
| "unpack-nested-aws-ntwsr": 0.00042100000428035855, |
| "unroll-while-loop": 1.9999999494757503e-05, |
| "zero_sized_hlo_elimination": 0.0008670000243000686 |
| } |
| }, |
| "cumsum": { |
| "compiletime": { |
| "CoalesceCCOp": 0.00020885467529296875, |
| "DMALocalityOpt": 0.00016832351684570313, |
| "DMAProfiler": 0.0007588863372802734, |
| "DataStreaming": 0.00029587745666503906, |
| "DoNothing": 0.00011897087097167969, |
| "ExpandISAMacro": 0.0005011558532714844, |
| "FactorizeBlkDims": 0.00043463706970214844, |
| "InferPSumTensor": 0.00044608116149902344, |
| "LateLegalizeInst": 0.0004031658172607422, |
| "LateNeuronInstComb": 0.0005033016204833984, |
| "LegalizeSundaAccess": 0.0021431446075439453, |
| "LegalizeType": 0.00024056434631347656, |
| "LowerBroadcast": 0.00022101402282714844, |
| "LowerIntrinsics": 0.00023508071899414063, |
| "LowerTranspose": 0.0002219676971435547, |
| "NeuronInstComb": 0.0005297660827636719, |
| "NeuronLICM": 0.00041484832763671875, |
| "NeuronSimplifyPredicates": 0.0028023719787597656, |
| "NeuronValueNumbering": 0.00043582916259765625, |
| "SFKVectorizer": 0.002759695053100586, |
| "SimpleAllReduceTiling": 0.00020432472229003906, |
| "SimplifyNeuronTensor": 0.0004029273986816406, |
| "SpillPSum": 0.0005388259887695313, |
| "WeightCoalescing": 0.0002307891845703125 |
| } |
| }, |
| "sg00": { |
| "compiletime": { |
| "CanonicalizeConv": 2.300000051036477e-05, |
| "CanonicalizeForTensorizer": 2.300000051036477e-05, |
| "Canonicalizer": 0.0005249999812804163, |
| "HoistCompute": 3.000000106112566e-06, |
| "IdentifyCrossPassTensors": 3.099999958067201e-05, |
| "MemcastMotion": 9.999999747378752e-06, |
| "PenguinizeFunctions": 2.2000000171829015e-05, |
| "PruneFunctions": 1.2999999853491317e-05, |
| "RemoveOptimizationBarriers": 4.400000034365803e-05, |
| "ScatterMotion": 6.000000212225132e-06, |
| "TensorizerLegalizationPass": 3.600000127335079e-05, |
| "VerifySupportedOps": 1.700000029813964e-05, |
| "algsimp": 0.0001049999991664663, |
| "batchnorm_expander": 1.8999999156221747e-05, |
| "boundary-marker-removal": 7.000000096013537e-06, |
| "call-inliner": 1.4000000192027073e-05, |
| "canonicalize-boundary-marker": 7.999999979801942e-06, |
| "collective-stream-id-checker": 3.999999989900971e-06, |
| "comparison-expander": 7.000000096013537e-06, |
| "computation-deduplicator": 2.099999983329326e-05, |
| "conditional-to-select": 7.000000096013537e-06, |
| "config-lowering": 0.00027600000612437725, |
| "constant_folding": 1.2000000424450263e-05, |
| "cse": 2.2000000171829015e-05, |
| "dce": 1.9999999949504854e-06, |
| "dynamic-slice-transpose": 6.000000212225132e-06, |
| "eliminate-redundant-compare": 6.000000212225132e-06, |
| "emit-offloaded-dropout": 3.7999998312443495e-05, |
| "flatten-call-graph": 1.2999999853491317e-05, |
| "fuse-send-recv": 3.099999958067201e-05, |
| "hilo::LegalizeAlias": 7.000000096013537e-06, |
| "hilo::NeuronInstCombine": 6.299999949987978e-05, |
| "hilo::NeuronOpFusion": 6.000000212225132e-06, |
| "hilo::ReplaceTokenTypeWithU8Pass": 2.300000051036477e-05, |
| "hilo::ScheduleFusion": 1.9999999949504854e-06, |
| "hilo::SixtyFourHack": 2.099999983329326e-05, |
| "hilo::VerifyAliasing": 3.000000106112566e-06, |
| "hlo-mac-count": 7.300000288523734e-05, |
| "hlo-verifier": 0.00023600000713486224, |
| "legalize-ccops": 1.9999999949504854e-06, |
| "legalize-compare": 6.000000212225132e-06, |
| "lower-argminmax-custom-call": 6.000000212225132e-06, |
| "map-inline": 1.700000029813964e-05, |
| "metadata-naming": 2.499999936844688e-05, |
| "mlir::detail::OpToOpPassAdaptor": 2.2000000171829015e-05, |
| "mlir::hlo::MhloToPyPenguin": 0.002633000025525689, |
| "mlir::mhlo::LowerComplexExtraPass": 0.0001049999991664663, |
| "mlir::mhlo::LowerComplexPass": 0.00017299999308306724, |
| "native-to-custom-softmax": 2.099999983329326e-05, |
| "native-to-custom-softmax-dx": 6.600000051548705e-05, |
| "operand_upcaster": 2.2000000171829015e-05, |
| "post-par-pipe-begin": 4.999999873689376e-06, |
| "post-par-pipe-end": 0.0, |
| "post-partition-simplification": 0.0008430000161752105, |
| "replace-minimum-constant": 1.1000000085914508e-05, |
| "reshape-mover": 4.999999873689376e-06, |
| "simplify-concat": 6.70000008540228e-05, |
| "simplify-while-loops": 3.999999989900971e-06, |
| "transform-variadic-reduce": 1.2999999853491317e-05, |
| "tuple-simplifier": 7.999999979801942e-06, |
| "unpack-nested-aws-ntwsr": 6.000000212225132e-06, |
| "unroll-while-loop": 9.999999974752427e-07 |
| }, |
| "hilo": { |
| "ArithmeticIntensity": 34.445003509521484, |
| "ConstantSize": 1189157.0, |
| "HloInputCount": 475.0, |
| "HloMacCount": 11811160064.0, |
| "HloOutputCount": 73.0, |
| "IfmapSize": 8266545152.0, |
| "OfmapSize": 75497472.0, |
| "OutputsReadFromCount": 0.0, |
| "PassthroughTensorsCount": 0.0, |
| "RedundantOutputCount": 0.0, |
| "Traffic": 685798208.0 |
| } |
| }, |
| "sg0000": { |
| "compiletime": { |
| "AGOrderingAnalysisPass": 0.07801461219787598, |
| "AffinePredicateResolution": 0.0017647743225097656, |
| "AliasDependencyElimination": 0.0001277923583984375, |
| "AliasDependencyInduction": 0.00855708122253418, |
| "AliasDependencyReset": 0.08457040786743164, |
| "BFComputeCutting": 0.003294229507446289, |
| "BirCodeGenLoop": 0.05274701118469238, |
| "CCOpFusion": 0.030017614364624023, |
| "CanonicalizeDAGForPGTiling": 0.003341197967529297, |
| "CanonicalizeIR": 0.0022792816162109375, |
| "CoalesceCCOp": 0.0053555965423583984, |
| "CommuteConcat": 0.0023560523986816406, |
| "DMALocalityOpt": 0.0013885498046875, |
| "DMAProfiler": 0.00625157356262207, |
| "DMATilingProfiler": 0.003763914108276367, |
| "DataLocalityOpt": 0.09786868095397949, |
| "DataStreaming": 0.004992246627807617, |
| "DeConcat": 0.002264261245727539, |
| "DeadCodeElimination": 0.002042531967163086, |
| "DeadStoreElimination": 0.030755043029785156, |
| "DelinearIndices": 0.009100914001464844, |
| "Delinearization": 0.004424571990966797, |
| "DoNothing": 6.914138793945313e-05, |
| "DramToDramTranspose": 0.03130936622619629, |
| "DumpGraphAndMetadata": 0.005283832550048828, |
| "EliminateDivs": 0.0042150020599365234, |
| "ExpandBatchNorm": 0.0019366741180419922, |
| "ExpandISAMacro": 0.002724170684814453, |
| "FactorizeBlkDims": 0.011873722076416016, |
| "FactorizeThreadAxesInFreeDims": 0.002283811569213867, |
| "FlattenMacroLoop": 0.0031974315643310547, |
| "GenericAccessSimplifier": 0.002216339111328125, |
| "InferInitValue": 0.030458927154541016, |
| "InferIntrinsicOnCC": 0.011402368545532227, |
| "InferNeuronTensor": 0.04513859748840332, |
| "InferNonlocalTensors": 0.10613727569580078, |
| "InferPSumTensor": 0.037427663803100586, |
| "InlineNativeKernels": 0.00368499755859375, |
| "InsertIOTransposes": 0.012629508972167969, |
| "InsertLocalTransposes": 0.007400989532470703, |
| "InsertOffloadedTransposes": 0.0025758743286132813, |
| "LICM": 0.0031554698944091797, |
| "LateLegalizeInst": 0.005858182907104492, |
| "LateLegalizePostSplit": 0.0029172897338867188, |
| "LateLowerReshapeOp": 0.0018696784973144531, |
| "LateLowerTensorOp": 0.004997968673706055, |
| "LateNeuronInstComb": 0.019808530807495117, |
| "LayoutPreprocessing": 0.04119300842285156, |
| "LayoutPreprocessingAndAnalysis": 0.10642147064208984, |
| "LayoutRequirementAnalysis": 0.0070705413818359375, |
| "LegalizeCCOpLayout": 0.004191398620605469, |
| "LegalizeOpLevelAlias": 0.0015521049499511719, |
| "LegalizePartitionReduce": 0.002257108688354492, |
| "LegalizeSundaAccess": 0.03900027275085449, |
| "LegalizeSundaMacro": 0.010483741760253906, |
| "LegalizeType": 0.0038602352142333984, |
| "LocalLayoutOpt": 0.01764845848083496, |
| "LoopFusion": 0.006066322326660156, |
| "LoopSplitting": 0.0015685558319091797, |
| "LowerBroadcast": 0.0020384788513183594, |
| "LowerCCOpBlockAxis": 0.005359172821044922, |
| "LowerComplexBroadcast": 0.0019440650939941406, |
| "LowerIntrinsics": 0.030491113662719727, |
| "LowerTensorOp": 0.012917041778564453, |
| "LowerTranspose": 0.010635852813720703, |
| "MacroGeneration": 0.06435012817382813, |
| "MaskPropagation": 0.0051097869873046875, |
| "MemcpyElimination": 0.11022067070007324, |
| "MutateDataType": 0.0014224052429199219, |
| "NeuronAliasDependencyInduction": 0.00023031234741210938, |
| "NeuronAliasDependencyReset": 0.021604061126708984, |
| "NeuronInstComb": 0.013072729110717773, |
| "NeuronLICM": 0.01006174087524414, |
| "NeuronLoopFusion": 0.017573833465576172, |
| "NeuronLoopInterchange": 0.0020608901977539063, |
| "NeuronSimplifier": 0.010074615478515625, |
| "NeuronSimplifyPredicates": 0.0060672760009765625, |
| "NeuronValueNumbering": 0.0041046142578125, |
| "OptimizeAliasedCopyChain": 0.0014190673828125, |
| "OptimizeNKIKernels": 0.0021109580993652344, |
| "PAGLayoutOpt": 0.3779466152191162, |
| "PComputeCutting": 0.008729696273803711, |
| "PGLayoutTilingPipeline": 1.5334703922271729, |
| "PGTiling": 0.47260475158691406, |
| "PadElimination": 0.0015625953674316406, |
| "ParAxesAnnotation": 0.2937772274017334, |
| "PartialLoopFusion": 0.016366004943847656, |
| "PartialSimdFusion": 0.01980447769165039, |
| "PerfectLoopNest": 0.0021877288818359375, |
| "RecognizeOpIdiom": 0.004831075668334961, |
| "Recompute": 0.00025010108947753906, |
| "RelaxPredicates": 0.0039484500885009766, |
| "Rematerialization": 0.004274129867553711, |
| "ReshapeWeights": 0.000804901123046875, |
| "ResolveAccessConflict": 0.0038733482360839844, |
| "ResolveComplicatePredicates": 0.0016858577728271484, |
| "RewriteReplicationMatmul": 0.0014014244079589844, |
| "RewriteWeights": 0.00405120849609375, |
| "SFKVectorizer": 0.20196890830993652, |
| "SimpleAllReduceTiling": 0.002203702926635742, |
| "Simplifier": 0.004297018051147461, |
| "SimplifyMacroPredicates": 0.01361393928527832, |
| "SimplifyNeuronTensor": 0.009984970092773438, |
| "SimplifySlice": 0.0010356903076171875, |
| "SimplifyTensor": 0.006205558776855469, |
| "SpillPSum": 0.016466140747070313, |
| "SplitAPUnionSets": 0.029446840286254883, |
| "SplitAccGrp": 0.0020453929901123047, |
| "StaticProfiler": 0.004591464996337891, |
| "StaticTransposeLocalTensor": 0.005173683166503906, |
| "SundaISel": 0.04554462432861328, |
| "TCTransform": 0.002426624298095703, |
| "TensorInitialization": 0.009510517120361328, |
| "TensorOpSimplifier": 0.0067560672760009766, |
| "TensorOpTransform": 0.028885841369628906, |
| "TileCCOps": 0.005466938018798828, |
| "TilingProfiler": 0.013426065444946289, |
| "TransformConvOp": 0.002458810806274414, |
| "TritiumFusion": 0.0620732307434082, |
| "ValueNumbering": 0.002520322799682617, |
| "VectorizeDMA": 0.005783796310424805, |
| "VectorizeMatMult": 0.005175352096557617, |
| "WeightCoalescing": 0.0029850006103515625, |
| "ZeroSizeTensorElimination": 0.00011801719665527344 |
| }, |
| "tensorizer": { |
| "DMATilingProfiler::TotalInstructionsAfterTiling": 2597.0, |
| "StaticProfiler::AifUb": 40.028141021728516, |
| "StaticProfiler::ArithmeticIntensityTensorizer": 420.0349426269531, |
| "StaticProfiler::AverageDmaLength": 1921.007568359375, |
| "StaticProfiler::AverageFractalPeUtilization": 99.95317840576172, |
| "StaticProfiler::AveragePartitionUtilization": 99.87249755859375, |
| "StaticProfiler::AveragePeUtilization": 99.80845642089844, |
| "StaticProfiler::DDRTransferBytes": 64558336.0, |
| "StaticProfiler::InternalTransferBytes": 52297728.0, |
| "StaticProfiler::LoadExpanded": 23298.0, |
| "StaticProfiler::LocalizationEfficiency": 1049.3489990234375, |
| "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 1358.191162109375, |
| "StaticProfiler::StoreExpanded": 5505.0, |
| "StaticProfiler::TotalDMAExpanded": 28803.0, |
| "StaticProfiler::TotalDynamicInstancesCount": 3692.0, |
| "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 3689.0, |
| "StaticProfiler::TotalLNCComm": 0.0, |
| "StaticProfiler::TotalLNCCommTransfer": 0.0, |
| "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0, |
| "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0, |
| "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0, |
| "TilingProfiler::DmaInstructionsAfterTiling": 0.0, |
| "TilingProfiler::GenericInstructionsAfterTiling": 48.0, |
| "TilingProfiler::MatMultInstructionsAfterTiling": 1412.0, |
| "TilingProfiler::NumPfTransposes": 7.0, |
| "TilingProfiler::NumPfTransposesForIo": 1.0, |
| "TilingProfiler::NumPfTransposesForLocal": 5.0, |
| "TilingProfiler::NumPfTransposesForNonlocal": 1.0, |
| "TilingProfiler::PfTransposeInstructions": 608.0, |
| "TilingProfiler::PfTransposeInstructionsForIo": 128.0, |
| "TilingProfiler::PfTransposeInstructionsForLocal": 416.0, |
| "TilingProfiler::PfTransposeInstructionsForNonlocal": 64.0, |
| "TilingProfiler::ReduceInstructionsAfterTiling": 0.0, |
| "TilingProfiler::SimdInstructionsAfterTiling": 257.0, |
| "TilingProfiler::TotalInstructionsAfterTiling": 0.0, |
| "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0, |
| "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0, |
| "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0, |
| "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0, |
| "TransformConvOp::conv2d_column_packing": 0.0, |
| "TransformConvOp::conv2d_column_packing_1": 0.0, |
| "TransformConvOp::conv2d_column_packing_io10": 0.0, |
| "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0 |
| } |
| }, |
| "sg0001": { |
| "compiletime": { |
| "AGOrderingAnalysisPass": 0.03313565254211426, |
| "AffinePredicateResolution": 0.0015239715576171875, |
| "AliasDependencyElimination": 0.00011467933654785156, |
| "AliasDependencyInduction": 0.009088993072509766, |
| "AliasDependencyReset": 1.062025547027588, |
| "BFComputeCutting": 0.0024559497833251953, |
| "BirCodeGenLoop": 0.03748297691345215, |
| "CCOpFusion": 0.04092240333557129, |
| "CanonicalizeDAGForPGTiling": 0.004329681396484375, |
| "CanonicalizeIR": 0.002464771270751953, |
| "CoalesceCCOp": 0.004778146743774414, |
| "CommuteConcat": 0.0011680126190185547, |
| "DMALocalityOpt": 0.0016834735870361328, |
| "DMAProfiler": 0.0039997100830078125, |
| "DMATilingProfiler": 0.004555702209472656, |
| "DataLocalityOpt": 0.13762187957763672, |
| "DataStreaming": 0.0044286251068115234, |
| "DeConcat": 0.0015981197357177734, |
| "DeadCodeElimination": 0.0020780563354492188, |
| "DeadStoreElimination": 0.03435230255126953, |
| "DelinearIndices": 0.00969839096069336, |
| "Delinearization": 0.0038826465606689453, |
| "DoNothing": 9.846687316894531e-05, |
| "DramToDramTranspose": 0.03438973426818848, |
| "DumpGraphAndMetadata": 0.00426793098449707, |
| "EliminateDivs": 0.004217386245727539, |
| "ExpandBatchNorm": 0.0019202232360839844, |
| "ExpandISAMacro": 0.0024042129516601563, |
| "FactorizeBlkDims": 0.01425933837890625, |
| "FactorizeThreadAxesInFreeDims": 0.0026972293853759766, |
| "FlattenMacroLoop": 0.002768993377685547, |
| "GenericAccessSimplifier": 0.001058816909790039, |
| "InferInitValue": 0.03559255599975586, |
| "InferIntrinsicOnCC": 0.009636163711547852, |
| "InferNeuronTensor": 0.04922318458557129, |
| "InferNonlocalTensors": 0.030732393264770508, |
| "InferPSumTensor": 0.03249359130859375, |
| "InlineNativeKernels": 0.0014734268188476563, |
| "InsertIOTransposes": 0.021765470504760742, |
| "InsertLocalTransposes": 0.006593465805053711, |
| "InsertOffloadedTransposes": 0.0034906864166259766, |
| "LICM": 0.003262758255004883, |
| "LateLegalizeInst": 0.00400543212890625, |
| "LateLegalizePostSplit": 0.00289154052734375, |
| "LateLowerReshapeOp": 0.002287149429321289, |
| "LateLowerTensorOp": 0.0046651363372802734, |
| "LateNeuronInstComb": 0.019269704818725586, |
| "LayoutPreprocessing": 0.03711414337158203, |
| "LayoutPreprocessingAndAnalysis": 0.2516040802001953, |
| "LayoutRequirementAnalysis": 0.007753133773803711, |
| "LegalizeCCOpLayout": 0.003732919692993164, |
| "LegalizeOpLevelAlias": 0.0016019344329833984, |
| "LegalizePartitionReduce": 0.0020945072174072266, |
| "LegalizeSundaAccess": 0.016069650650024414, |
| "LegalizeSundaMacro": 0.010806083679199219, |
| "LegalizeType": 0.004706859588623047, |
| "LocalLayoutOpt": 0.02442765235900879, |
| "LoopFusion": 0.0067822933197021484, |
| "LoopSplitting": 0.00033974647521972656, |
| "LowerBroadcast": 0.0019419193267822266, |
| "LowerCCOpBlockAxis": 0.005570650100708008, |
| "LowerComplexBroadcast": 0.0020999908447265625, |
| "LowerIntrinsics": 0.03607368469238281, |
| "LowerTensorOp": 0.011876583099365234, |
| "LowerTranspose": 0.011530637741088867, |
| "MacroGeneration": 0.10653066635131836, |
| "MaskPropagation": 0.003092050552368164, |
| "MemcpyElimination": 0.10495471954345703, |
| "MutateDataType": 0.0014193058013916016, |
| "NeuronAliasDependencyInduction": 0.0002295970916748047, |
| "NeuronAliasDependencyReset": 0.021070480346679688, |
| "NeuronInstComb": 0.012903451919555664, |
| "NeuronLICM": 0.00844264030456543, |
| "NeuronLoopFusion": 0.020880460739135742, |
| "NeuronLoopInterchange": 0.0021686553955078125, |
| "NeuronSimplifier": 0.011090755462646484, |
| "NeuronSimplifyPredicates": 0.0016274452209472656, |
| "NeuronValueNumbering": 0.004062652587890625, |
| "OptimizeAliasedCopyChain": 0.0014641284942626953, |
| "OptimizeNKIKernels": 0.0023856163024902344, |
| "PAGLayoutOpt": 0.17638587951660156, |
| "PComputeCutting": 0.00709986686706543, |
| "PGLayoutTilingPipeline": 1.142796516418457, |
| "PGTiling": 0.39766955375671387, |
| "PadElimination": 0.0015380382537841797, |
| "ParAxesAnnotation": 0.09186458587646484, |
| "PartialLoopFusion": 0.015995025634765625, |
| "PartialSimdFusion": 0.026766300201416016, |
| "PerfectLoopNest": 0.002192258834838867, |
| "RecognizeOpIdiom": 0.004943370819091797, |
| "Recompute": 0.00025773048400878906, |
| "RelaxPredicates": 0.003591299057006836, |
| "Rematerialization": 0.0025196075439453125, |
| "ReshapeWeights": 0.0007069110870361328, |
| "ResolveAccessConflict": 0.00481104850769043, |
| "ResolveComplicatePredicates": 0.002285003662109375, |
| "RewriteReplicationMatmul": 0.0021715164184570313, |
| "RewriteWeights": 0.003401041030883789, |
| "SFKVectorizer": 0.14661574363708496, |
| "SimpleAllReduceTiling": 0.0016207695007324219, |
| "Simplifier": 0.00443577766418457, |
| "SimplifyMacroPredicates": 0.006165742874145508, |
| "SimplifyNeuronTensor": 0.006829500198364258, |
| "SimplifySlice": 0.0013000965118408203, |
| "SimplifyTensor": 0.0061337947845458984, |
| "SpillPSum": 0.018761634826660156, |
| "SplitAPUnionSets": 0.017923593521118164, |
| "SplitAccGrp": 0.002531290054321289, |
| "StaticProfiler": 0.003990888595581055, |
| "StaticTransposeLocalTensor": 0.004915952682495117, |
| "SundaISel": 0.04209589958190918, |
| "TCTransform": 0.0012347698211669922, |
| "TensorInitialization": 0.002599954605102539, |
| "TensorOpSimplifier": 0.006845712661743164, |
| "TensorOpTransform": 0.03345227241516113, |
| "TileCCOps": 0.005617856979370117, |
| "TilingProfiler": 0.015013933181762695, |
| "TransformConvOp": 0.002393960952758789, |
| "TritiumFusion": 0.09340715408325195, |
| "ValueNumbering": 0.0031540393829345703, |
| "VectorizeDMA": 0.0015842914581298828, |
| "VectorizeMatMult": 0.0071103572845458984, |
| "WeightCoalescing": 0.0026235580444335938, |
| "ZeroSizeTensorElimination": 0.0001163482666015625 |
| }, |
| "tensorizer": { |
| "DMATilingProfiler::TotalInstructionsAfterTiling": 7847.0, |
| "StaticProfiler::AifUb": 490.6532287597656, |
| "StaticProfiler::ArithmeticIntensityTensorizer": 487.63507080078125, |
| "StaticProfiler::AverageDmaLength": 869.1515502929688, |
| "StaticProfiler::AverageFractalPeUtilization": 100.0, |
| "StaticProfiler::AveragePartitionUtilization": 99.83790588378906, |
| "StaticProfiler::AveragePeUtilization": 100.0, |
| "StaticProfiler::DDRTransferBytes": 215827456.0, |
| "StaticProfiler::InternalTransferBytes": 43515904.0, |
| "StaticProfiler::LoadExpanded": 238976.0, |
| "StaticProfiler::LocalizationEfficiency": 99.38487243652344, |
| "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 107.76165771484375, |
| "StaticProfiler::StoreExpanded": 5121.0, |
| "StaticProfiler::TotalDMAExpanded": 244097.0, |
| "StaticProfiler::TotalDynamicInstancesCount": 9872.0, |
| "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 9872.0, |
| "StaticProfiler::TotalLNCComm": 0.0, |
| "StaticProfiler::TotalLNCCommTransfer": 0.0, |
| "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0, |
| "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0, |
| "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0, |
| "TilingProfiler::DmaInstructionsAfterTiling": 0.0, |
| "TilingProfiler::GenericInstructionsAfterTiling": 32.0, |
| "TilingProfiler::MatMultInstructionsAfterTiling": 6016.0, |
| "TilingProfiler::NumPfTransposes": 8.0, |
| "TilingProfiler::NumPfTransposesForIo": 3.0, |
| "TilingProfiler::NumPfTransposesForLocal": 3.0, |
| "TilingProfiler::NumPfTransposesForNonlocal": 2.0, |
| "TilingProfiler::PfTransposeInstructions": 680.0, |
| "TilingProfiler::PfTransposeInstructionsForIo": 136.0, |
| "TilingProfiler::PfTransposeInstructionsForLocal": 288.0, |
| "TilingProfiler::PfTransposeInstructionsForNonlocal": 256.0, |
| "TilingProfiler::ReduceInstructionsAfterTiling": 0.0, |
| "TilingProfiler::SimdInstructionsAfterTiling": 288.0, |
| "TilingProfiler::TotalInstructionsAfterTiling": 0.0, |
| "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0, |
| "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0, |
| "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0, |
| "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0, |
| "TransformConvOp::conv2d_column_packing": 0.0, |
| "TransformConvOp::conv2d_column_packing_1": 0.0, |
| "TransformConvOp::conv2d_column_packing_io10": 0.0, |
| "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0 |
| } |
| }, |
| "sg0002": { |
| "compiletime": { |
| "AGOrderingAnalysisPass": 0.018257856369018555, |
| "AffinePredicateResolution": 0.0011677742004394531, |
| "AliasDependencyElimination": 0.0001201629638671875, |
| "AliasDependencyInduction": 0.0052988529205322266, |
| "AliasDependencyReset": 0.029210567474365234, |
| "BFComputeCutting": 0.0032625198364257813, |
| "BirCodeGenLoop": 0.4527714252471924, |
| "CCOpFusion": 0.02410125732421875, |
| "CanonicalizeDAGForPGTiling": 0.004324913024902344, |
| "CanonicalizeIR": 0.0019502639770507813, |
| "CoalesceCCOp": 0.014463186264038086, |
| "CommuteConcat": 0.0008339881896972656, |
| "DMALocalityOpt": 0.005598783493041992, |
| "DMAProfiler": 0.01209115982055664, |
| "DMATilingProfiler": 0.004332065582275391, |
| "DataLocalityOpt": 0.07260942459106445, |
| "DataStreaming": 0.03940248489379883, |
| "DeConcat": 0.0005326271057128906, |
| "DeadCodeElimination": 0.0009255409240722656, |
| "DeadStoreElimination": 0.0055675506591796875, |
| "DelinearIndices": 0.004735231399536133, |
| "Delinearization": 0.0030374526977539063, |
| "DoNothing": 7.033348083496094e-05, |
| "DramToDramTranspose": 0.018135547637939453, |
| "DumpGraphAndMetadata": 0.09476375579833984, |
| "EliminateDivs": 0.002595663070678711, |
| "ExpandBatchNorm": 0.002063274383544922, |
| "ExpandISAMacro": 0.011472225189208984, |
| "FactorizeBlkDims": 0.008858203887939453, |
| "FactorizeThreadAxesInFreeDims": 0.0010046958923339844, |
| "FlattenMacroLoop": 0.002232074737548828, |
| "GenericAccessSimplifier": 0.0018167495727539063, |
| "InferInitValue": 0.024865150451660156, |
| "InferIntrinsicOnCC": 0.009101152420043945, |
| "InferNeuronTensor": 0.023293495178222656, |
| "InferNonlocalTensors": 0.01632833480834961, |
| "InferPSumTensor": 0.27681708335876465, |
| "InlineNativeKernels": 0.0081634521484375, |
| "InsertIOTransposes": 0.019203901290893555, |
| "InsertLocalTransposes": 0.0042340755462646484, |
| "InsertOffloadedTransposes": 0.002811431884765625, |
| "LICM": 0.0029730796813964844, |
| "LateLegalizeInst": 0.01390385627746582, |
| "LateLegalizePostSplit": 0.012536048889160156, |
| "LateLowerReshapeOp": 0.0018641948699951172, |
| "LateLowerTensorOp": 0.0014081001281738281, |
| "LateNeuronInstComb": 0.008648872375488281, |
| "LayoutPreprocessing": 0.02658390998840332, |
| "LayoutPreprocessingAndAnalysis": 0.10707235336303711, |
| "LayoutRequirementAnalysis": 0.005135536193847656, |
| "LegalizeCCOpLayout": 0.002307415008544922, |
| "LegalizeOpLevelAlias": 0.0012297630310058594, |
| "LegalizePartitionReduce": 0.0010194778442382813, |
| "LegalizeSundaAccess": 0.07593941688537598, |
| "LegalizeSundaMacro": 0.010968446731567383, |
| "LegalizeType": 0.011834383010864258, |
| "LocalLayoutOpt": 0.013799905776977539, |
| "LoopFusion": 0.0052182674407958984, |
| "LoopSplitting": 0.0003161430358886719, |
| "LowerBroadcast": 0.0013611316680908203, |
| "LowerCCOpBlockAxis": 0.0040547847747802734, |
| "LowerComplexBroadcast": 0.002165079116821289, |
| "LowerIntrinsics": 0.31132984161376953, |
| "LowerTensorOp": 0.010558843612670898, |
| "LowerTranspose": 0.012272357940673828, |
| "MacroGeneration": 0.029862642288208008, |
| "MaskPropagation": 0.002757549285888672, |
| "MemcpyElimination": 0.025969266891479492, |
| "MutateDataType": 0.002087831497192383, |
| "NeuronAliasDependencyInduction": 0.00016880035400390625, |
| "NeuronAliasDependencyReset": 0.020352602005004883, |
| "NeuronInstComb": 0.004126310348510742, |
| "NeuronLICM": 0.0351865291595459, |
| "NeuronLoopFusion": 0.007991313934326172, |
| "NeuronLoopInterchange": 0.002409219741821289, |
| "NeuronSimplifier": 0.007069587707519531, |
| "NeuronSimplifyPredicates": 0.12138772010803223, |
| "NeuronValueNumbering": 0.0028395652770996094, |
| "OptimizeAliasedCopyChain": 0.0005936622619628906, |
| "OptimizeNKIKernels": 0.5374257564544678, |
| "PAGLayoutOpt": 0.08115577697753906, |
| "PComputeCutting": 0.004801273345947266, |
| "PGLayoutTilingPipeline": 0.5454635620117188, |
| "PGTiling": 0.14933419227600098, |
| "PadElimination": 0.00034046173095703125, |
| "ParAxesAnnotation": 0.053552865982055664, |
| "PartialLoopFusion": 0.0067539215087890625, |
| "PartialSimdFusion": 0.00693058967590332, |
| "PerfectLoopNest": 0.0035321712493896484, |
| "RecognizeOpIdiom": 0.003947257995605469, |
| "Recompute": 0.00024962425231933594, |
| "RelaxPredicates": 0.013285398483276367, |
| "Rematerialization": 0.002062082290649414, |
| "ReshapeWeights": 0.002131223678588867, |
| "ResolveAccessConflict": 0.0038597583770751953, |
| "ResolveComplicatePredicates": 0.002032756805419922, |
| "RewriteReplicationMatmul": 0.001924753189086914, |
| "RewriteWeights": 0.002452373504638672, |
| "SFKVectorizer": 0.2690722942352295, |
| "SimpleAllReduceTiling": 0.008755922317504883, |
| "Simplifier": 0.004038810729980469, |
| "SimplifyMacroPredicates": 0.010622739791870117, |
| "SimplifyNeuronTensor": 1.059011697769165, |
| "SimplifySlice": 0.0009577274322509766, |
| "SimplifyTensor": 0.005341768264770508, |
| "SpillPSum": 0.011537313461303711, |
| "SplitAPUnionSets": 0.10771751403808594, |
| "SplitAccGrp": 0.002201557159423828, |
| "StaticProfiler": 0.012447118759155273, |
| "StaticTransposeLocalTensor": 0.0038712024688720703, |
| "SundaISel": 0.04214668273925781, |
| "TCTransform": 0.0008432865142822266, |
| "TensorInitialization": 0.012825727462768555, |
| "TensorOpSimplifier": 0.004651308059692383, |
| "TensorOpTransform": 0.019537687301635742, |
| "TileCCOps": 0.006766319274902344, |
| "TilingProfiler": 0.006911277770996094, |
| "TransformConvOp": 0.0030303001403808594, |
| "TritiumFusion": 0.04502224922180176, |
| "ValueNumbering": 0.001996755599975586, |
| "VectorizeDMA": 0.0019402503967285156, |
| "VectorizeMatMult": 0.0027413368225097656, |
| "WeightCoalescing": 0.00829005241394043, |
| "ZeroSizeTensorElimination": 0.00013709068298339844 |
| }, |
| "tensorizer": { |
| "DMATilingProfiler::TotalInstructionsAfterTiling": 44382.0, |
| "StaticProfiler::AifUb": 205.154296875, |
| "StaticProfiler::ArithmeticIntensityTensorizer": 201.6046905517578, |
| "StaticProfiler::AverageDmaLength": 1901.806396484375, |
| "StaticProfiler::AverageFractalPeUtilization": 99.66542053222656, |
| "StaticProfiler::AveragePartitionUtilization": 97.7269515991211, |
| "StaticProfiler::AveragePeUtilization": 98.64861297607422, |
| "StaticProfiler::DDRTransferBytes": 795531072.0, |
| "StaticProfiler::InternalTransferBytes": 646388224.0, |
| "StaticProfiler::LoadExpanded": 376342.0, |
| "StaticProfiler::LocalizationEfficiency": 98.26979064941406, |
| "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 101.01405334472656, |
| "StaticProfiler::StoreExpanded": 4189.0, |
| "StaticProfiler::TotalDMAExpanded": 380531.0, |
| "StaticProfiler::TotalDynamicInstancesCount": 53882.0, |
| "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 53436.0, |
| "StaticProfiler::TotalLNCComm": 0.0, |
| "StaticProfiler::TotalLNCCommTransfer": 0.0, |
| "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0, |
| "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0, |
| "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0, |
| "TilingProfiler::DmaInstructionsAfterTiling": 0.0, |
| "TilingProfiler::GenericInstructionsAfterTiling": 4.0, |
| "TilingProfiler::MatMultInstructionsAfterTiling": 23616.0, |
| "TilingProfiler::NumPfTransposes": 5.0, |
| "TilingProfiler::NumPfTransposesForIo": 1.0, |
| "TilingProfiler::NumPfTransposesForLocal": 1.0, |
| "TilingProfiler::NumPfTransposesForNonlocal": 3.0, |
| "TilingProfiler::PfTransposeInstructions": 19393.0, |
| "TilingProfiler::PfTransposeInstructionsForIo": 19008.0, |
| "TilingProfiler::PfTransposeInstructionsForLocal": 1.0, |
| "TilingProfiler::PfTransposeInstructionsForNonlocal": 384.0, |
| "TilingProfiler::ReduceInstructionsAfterTiling": 4.0, |
| "TilingProfiler::SimdInstructionsAfterTiling": 158.0, |
| "TilingProfiler::TotalInstructionsAfterTiling": 0.0, |
| "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0, |
| "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0, |
| "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0, |
| "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0, |
| "TransformConvOp::conv2d_column_packing": 0.0, |
| "TransformConvOp::conv2d_column_packing_1": 0.0, |
| "TransformConvOp::conv2d_column_packing_io10": 0.0, |
| "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0 |
| } |
| }, |
| "sg01": { |
| "compiletime": { |
| "CanonicalizeConv": 1.2000000424450263e-05, |
| "CanonicalizeForTensorizer": 1.2999999853491317e-05, |
| "Canonicalizer": 0.0002500000118743628, |
| "HoistCompute": 3.000000106112566e-06, |
| "IdentifyCrossPassTensors": 2.300000051036477e-05, |
| "MemcastMotion": 1.1000000085914508e-05, |
| "PenguinizeFunctions": 1.4000000192027073e-05, |
| "PruneFunctions": 3.099999958067201e-05, |
| "RemoveOptimizationBarriers": 2.2000000171829015e-05, |
| "ScatterMotion": 2.9999999242136255e-05, |
| "TensorizerLegalizationPass": 1.700000029813964e-05, |
| "VerifySupportedOps": 9.000000318337698e-06, |
| "algsimp": 6.299999949987978e-05, |
| "batchnorm_expander": 1.2999999853491317e-05, |
| "boundary-marker-removal": 4.999999873689376e-06, |
| "call-inliner": 9.000000318337698e-06, |
| "canonicalize-boundary-marker": 6.000000212225132e-06, |
| "collective-stream-id-checker": 3.000000106112566e-06, |
| "comparison-expander": 4.999999873689376e-06, |
| "computation-deduplicator": 1.8000000636675395e-05, |
| "conditional-to-select": 4.999999873689376e-06, |
| "config-lowering": 2.5999999706982635e-05, |
| "constant_folding": 7.999999979801942e-06, |
| "cse": 1.2000000424450263e-05, |
| "dce": 9.999999974752427e-07, |
| "dynamic-slice-transpose": 3.999999989900971e-06, |
| "eliminate-redundant-compare": 3.999999989900971e-06, |
| "emit-offloaded-dropout": 1.2999999853491317e-05, |
| "flatten-call-graph": 7.999999979801942e-06, |
| "fuse-send-recv": 2.099999983329326e-05, |
| "hilo::LegalizeAlias": 4.999999873689376e-06, |
| "hilo::NeuronInstCombine": 4.5000000682193786e-05, |
| "hilo::NeuronOpFusion": 1.700000029813964e-05, |
| "hilo::ReplaceTokenTypeWithU8Pass": 2.099999983329326e-05, |
| "hilo::ScheduleFusion": 9.999999974752427e-07, |
| "hilo::SixtyFourHack": 1.2999999853491317e-05, |
| "hilo::VerifyAliasing": 1.9999999949504854e-06, |
| "hlo-mac-count": 2.9999999242136255e-05, |
| "hlo-verifier": 0.00018000000272877514, |
| "legalize-ccops": 9.999999974752427e-07, |
| "legalize-compare": 3.999999989900971e-06, |
| "lower-argminmax-custom-call": 3.999999989900971e-06, |
| "map-inline": 1.2000000424450263e-05, |
| "metadata-naming": 1.8000000636675395e-05, |
| "mlir::detail::OpToOpPassAdaptor": 9.999999747378752e-05, |
| "mlir::hlo::MhloToPyPenguin": 0.0009420000133104622, |
| "mlir::mhlo::LowerComplexExtraPass": 7.999999797903001e-05, |
| "mlir::mhlo::LowerComplexPass": 0.00015799999528098851, |
| "native-to-custom-softmax": 6.000000212225132e-06, |
| "native-to-custom-softmax-dx": 1.2999999853491317e-05, |
| "operand_upcaster": 1.4999999621068127e-05, |
| "post-par-pipe-begin": 1.9999999949504854e-06, |
| "post-par-pipe-end": 0.0, |
| "post-partition-simplification": 0.0005130000063218176, |
| "replace-minimum-constant": 6.000000212225132e-06, |
| "reshape-mover": 3.000000106112566e-06, |
| "simplify-concat": 4.8999998398358e-05, |
| "simplify-while-loops": 1.9999999949504854e-06, |
| "transform-variadic-reduce": 9.000000318337698e-06, |
| "tuple-simplifier": 4.999999873689376e-06, |
| "unpack-nested-aws-ntwsr": 3.999999989900971e-06, |
| "unroll-while-loop": 0.0 |
| }, |
| "hilo": { |
| "ArithmeticIntensity": 457.20416259765625, |
| "HloMacCount": 50465865728.0, |
| "Traffic": 220758560.0 |
| } |
| }, |
| "sg02": { |
| "compiletime": { |
| "CanonicalizeConv": 0.0002589999930933118, |
| "CanonicalizeForTensorizer": 1.2000000424450263e-05, |
| "Canonicalizer": 0.0003060000017285347, |
| "HoistCompute": 1.9999999949504854e-06, |
| "IdentifyCrossPassTensors": 2.4000000848900527e-05, |
| "MemcastMotion": 1.2999999853491317e-05, |
| "PenguinizeFunctions": 9.000000318337698e-06, |
| "PruneFunctions": 7.999999979801942e-06, |
| "RemoveOptimizationBarriers": 2.099999983329326e-05, |
| "ScatterMotion": 1.9999999949504854e-06, |
| "TensorizerLegalizationPass": 4.999999873689376e-06, |
| "VerifySupportedOps": 1.1000000085914508e-05, |
| "algsimp": 5.900000178371556e-05, |
| "batchnorm_expander": 1.2000000424450263e-05, |
| "boundary-marker-removal": 3.999999989900971e-06, |
| "call-inliner": 1.1000000085914508e-05, |
| "canonicalize-boundary-marker": 4.999999873689376e-06, |
| "collective-stream-id-checker": 3.000000106112566e-06, |
| "comparison-expander": 4.999999873689376e-06, |
| "computation-deduplicator": 4.099999932805076e-05, |
| "conditional-to-select": 6.000000212225132e-06, |
| "config-lowering": 2.5999999706982635e-05, |
| "constant_folding": 9.000000318337698e-06, |
| "cse": 1.1000000085914508e-05, |
| "dce": 9.999999974752427e-07, |
| "dynamic-slice-transpose": 3.999999989900971e-06, |
| "eliminate-redundant-compare": 3.999999989900971e-06, |
| "emit-offloaded-dropout": 1.2999999853491317e-05, |
| "flatten-call-graph": 1.1000000085914508e-05, |
| "fuse-send-recv": 1.8000000636675395e-05, |
| "hilo::LegalizeAlias": 1.9999999949504854e-06, |
| "hilo::NeuronInstCombine": 5.8000001445179805e-05, |
| "hilo::NeuronOpFusion": 1.9999999949504854e-06, |
| "hilo::ReplaceTokenTypeWithU8Pass": 9.000000318337698e-06, |
| "hilo::ScheduleFusion": 3.999999989900971e-06, |
| "hilo::SixtyFourHack": 3.899999865097925e-05, |
| "hilo::VerifyAliasing": 9.999999974752427e-07, |
| "hlo-mac-count": 0.00016799999866634607, |
| "hlo-verifier": 0.00015900000289548188, |
| "legalize-ccops": 9.999999974752427e-07, |
| "legalize-compare": 3.000000106112566e-06, |
| "lower-argminmax-custom-call": 3.000000106112566e-06, |
| "map-inline": 1.2000000424450263e-05, |
| "metadata-naming": 1.700000029813964e-05, |
| "mlir::detail::OpToOpPassAdaptor": 2.2000000171829015e-05, |
| "mlir::hlo::MhloToPyPenguin": 0.0008549999911338091, |
| "mlir::mhlo::LowerComplexExtraPass": 8.800000068731606e-05, |
| "mlir::mhlo::LowerComplexPass": 0.00015999999595806003, |
| "native-to-custom-softmax": 4.999999873689376e-06, |
| "native-to-custom-softmax-dx": 1.9999999494757503e-05, |
| "operand_upcaster": 1.2000000424450263e-05, |
| "post-par-pipe-begin": 1.9999999949504854e-06, |
| "post-par-pipe-end": 0.0, |
| "post-partition-simplification": 0.0005009999731555581, |
| "replace-minimum-constant": 9.000000318337698e-06, |
| "reshape-mover": 3.000000106112566e-06, |
| "simplify-concat": 4.3000000005122274e-05, |
| "simplify-while-loops": 1.9999999949504854e-06, |
| "transform-variadic-reduce": 4.8000001697801054e-05, |
| "tuple-simplifier": 4.999999873689376e-06, |
| "unpack-nested-aws-ntwsr": 3.999999989900971e-06, |
| "unroll-while-loop": 0.0 |
| }, |
| "hilo": { |
| "ArithmeticIntensity": 99.1578140258789, |
| "HloMacCount": 38965870592.0, |
| "Traffic": 785936448.0 |
| } |
| } |
| } |