| { |
| "Average": { |
| "tensorizer": { |
| "StaticProfiler::AverageFractalPeUtilization": 99.7004623413086, |
| "StaticProfiler::AveragePartitionUtilization": 97.94140625, |
| "StaticProfiler::AveragePeUtilization": 98.78884887695313, |
| "StaticProfiler::LocalizationEfficiency": 91.59693145751953, |
| "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 95.863037109375, |
| "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0, |
| "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0 |
| } |
| }, |
| "Count": { |
| "tensorizer": { |
| "StaticProfiler::AverageFractalPeUtilization": 1.0, |
| "StaticProfiler::AveragePartitionUtilization": 1.0, |
| "StaticProfiler::AveragePeUtilization": 1.0, |
| "StaticProfiler::LocalizationEfficiency": 1.0, |
| "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 1.0, |
| "TilingProfiler::AveragePartitionUtilizationAfterTiling": 1.0, |
| "TilingProfiler::AveragePeUtilizationAfterTiling": 1.0 |
| } |
| }, |
| "Sum": { |
| "compiletime": { |
| "AGOrderingAnalysisPass": 0.01837611198425293, |
| "AffinePredicateResolution": 0.0011184215545654297, |
| "AliasDependencyElimination": 0.00015664100646972656, |
| "AliasDependencyInduction": 0.005170583724975586, |
| "AliasDependencyReset": 0.027508020401000977, |
| "BFComputeCutting": 0.0036101341247558594, |
| "BirCodeGenLoop": 0.4774467945098877, |
| "CCOpFusion": 0.033265113830566406, |
| "CanonicalizeConv": 2.300000051036477e-05, |
| "CanonicalizeDAGForPGTiling": 0.004282712936401367, |
| "CanonicalizeForTensorizer": 4.600000102072954e-05, |
| "CanonicalizeIR": 0.0024569034576416016, |
| "Canonicalizer": 0.0009039999567903578, |
| "CoalesceCCOp": 0.014229059219360352, |
| "CommuteConcat": 0.0017316341400146484, |
| "DMALocalityOpt": 0.005630016326904297, |
| "DMAProfiler": 0.012981653213500977, |
| "DMATilingProfiler": 0.0037560462951660156, |
| "DataLocalityOpt": 0.07645320892333984, |
| "DataStreaming": 0.03730320930480957, |
| "DeConcat": 0.0018520355224609375, |
| "DeadCodeElimination": 0.0020148754119873047, |
| "DeadStoreElimination": 0.006912708282470703, |
| "DelinearIndices": 0.004647254943847656, |
| "Delinearization": 0.003908872604370117, |
| "DoNothing": 0.0001888275146484375, |
| "DramToDramTranspose": 0.02015542984008789, |
| "DumpGraphAndMetadata": 0.08691883087158203, |
| "EliminateDivs": 0.0025060176849365234, |
| "ExpandBatchNorm": 0.0027189254760742188, |
| "ExpandISAMacro": 0.011646032333374023, |
| "FactorizeBlkDims": 0.010123252868652344, |
| "FactorizeThreadAxesInFreeDims": 0.0023202896118164063, |
| "FlattenMacroLoop": 0.00232696533203125, |
| "GenericAccessSimplifier": 0.0008094310760498047, |
| "HoistCompute": 5.999999757477781e-06, |
| "IdentifyCrossPassTensors": 5.2999999752501026e-05, |
| "InferInitValue": 0.02833867073059082, |
| "InferIntrinsicOnCC": 0.008923768997192383, |
| "InferNeuronTensor": 0.025766372680664063, |
| "InferNonlocalTensors": 0.014599800109863281, |
| "InferPSumTensor": 0.28418898582458496, |
| "InlineNativeKernels": 0.00860905647277832, |
| "InsertIOTransposes": 0.01989889144897461, |
| "InsertLocalTransposes": 0.004229307174682617, |
| "InsertOffloadedTransposes": 0.0029871463775634766, |
| "LICM": 0.0030870437622070313, |
| "LateLegalizeInst": 0.014106035232543945, |
| "LateLegalizePostSplit": 0.014872312545776367, |
| "LateLowerReshapeOp": 0.0010464191436767578, |
| "LateLowerTensorOp": 0.002707242965698242, |
| "LateNeuronInstComb": 0.010563373565673828, |
| "LayoutPreprocessing": 0.026853561401367188, |
| "LayoutPreprocessingAndAnalysis": 0.0556035041809082, |
| "LayoutRequirementAnalysis": 0.004946470260620117, |
| "LegalizeCCOpLayout": 0.0025353431701660156, |
| "LegalizeOpLevelAlias": 0.0018966197967529297, |
| "LegalizePartitionReduce": 0.0017490386962890625, |
| "LegalizeSundaAccess": 0.07800722122192383, |
| "LegalizeSundaMacro": 0.012125253677368164, |
| "LegalizeType": 0.012685060501098633, |
| "LocalLayoutOpt": 0.013860225677490234, |
| "LoopFusion": 0.005201578140258789, |
| "LoopSplitting": 0.0003204345703125, |
| "LowerBroadcast": 0.002086162567138672, |
| "LowerCCOpBlockAxis": 0.0040171146392822266, |
| "LowerComplexBroadcast": 0.002280712127685547, |
| "LowerIntrinsics": 0.3143951892852783, |
| "LowerTensorOp": 0.01141357421875, |
| "LowerTranspose": 0.012923002243041992, |
| "MacroGeneration": 0.034410953521728516, |
| "MaskPropagation": 0.0028192996978759766, |
| "MemcastMotion": 1.8000000636675395e-05, |
| "MemcpyElimination": 0.02788853645324707, |
| "MutateDataType": 0.0012311935424804688, |
| "NeuronAliasDependencyInduction": 0.0001773834228515625, |
| "NeuronAliasDependencyReset": 0.024976015090942383, |
| "NeuronInstComb": 0.005156517028808594, |
| "NeuronLICM": 0.036696434020996094, |
| "NeuronLoopFusion": 0.008457422256469727, |
| "NeuronLoopInterchange": 0.001413106918334961, |
| "NeuronSimplifier": 0.007856369018554688, |
| "NeuronSimplifyPredicates": 0.12235808372497559, |
| "NeuronValueNumbering": 0.004765748977661133, |
| "OptimizeAliasedCopyChain": 0.0006341934204101563, |
| "OptimizeNKIKernels": 0.38834357261657715, |
| "PAGLayoutOpt": 0.0889735221862793, |
| "PComputeCutting": 0.005109071731567383, |
| "PGLayoutTilingPipeline": 0.6248171329498291, |
| "PGTiling": 0.1645822525024414, |
| "PadElimination": 0.0003485679626464844, |
| "ParAxesAnnotation": 0.05196070671081543, |
| "PartialLoopFusion": 0.011112451553344727, |
| "PartialSimdFusion": 0.012138128280639648, |
| "PenguinizeFunctions": 4.3000000005122274e-05, |
| "PerfectLoopNest": 0.002288341522216797, |
| "PruneFunctions": 4.099999932805076e-05, |
| "RecognizeOpIdiom": 0.0041277408599853516, |
| "Recompute": 0.00026416778564453125, |
| "RelaxPredicates": 0.01356959342956543, |
| "Rematerialization": 0.0024864673614501953, |
| "RemoveOptimizationBarriers": 4.900000203633681e-05, |
| "ReshapeWeights": 0.0007522106170654297, |
| "ResolveAccessConflict": 0.0048482418060302734, |
| "ResolveComplicatePredicates": 0.0015094280242919922, |
| "RewriteReplicationMatmul": 0.0015668869018554688, |
| "RewriteWeights": 0.0027174949645996094, |
| "SFKVectorizer": 0.2781519889831543, |
| "ScatterMotion": 4.70000013592653e-05, |
| "SimpleAllReduceTiling": 0.009549379348754883, |
| "Simplifier": 0.003630399703979492, |
| "SimplifyMacroPredicates": 0.011396646499633789, |
| "SimplifyNeuronTensor": 1.0561063289642334, |
| "SimplifySlice": 0.0023348331451416016, |
| "SimplifyTensor": 0.005601167678833008, |
| "SpillPSum": 0.013618230819702148, |
| "SplitAPUnionSets": 0.11336159706115723, |
| "SplitAccGrp": 0.001394510269165039, |
| "StaticProfiler": 0.014252662658691406, |
| "StaticTransposeLocalTensor": 0.003930330276489258, |
| "SundaISel": 0.04436635971069336, |
| "TCTransform": 0.0008757114410400391, |
| "TensorInitialization": 0.01558232307434082, |
| "TensorOpSimplifier": 0.004608869552612305, |
| "TensorOpTransform": 0.01923346519470215, |
| "TensorizerLegalizationPass": 5.2999999752501026e-05, |
| "TileCCOps": 0.005507707595825195, |
| "TilingProfiler": 0.007405757904052734, |
| "TransformConvOp": 0.0030219554901123047, |
| "TritiumFusion": 0.05425119400024414, |
| "ValueNumbering": 0.0020017623901367188, |
| "VectorizeDMA": 0.002228975296020508, |
| "VectorizeMatMult": 0.006806135177612305, |
| "VerifySupportedOps": 3.5000000934815034e-05, |
| "WeightCoalescing": 0.008660554885864258, |
| "ZeroSizeTensorElimination": 0.00014281272888183594, |
| "algsimp": 0.0027209999971091747, |
| "batchnorm_expander": 4.099999932805076e-05, |
| "boundary-marker-removal": 1.2999998943996616e-05, |
| "call-inliner": 0.0004540000227279961, |
| "canonicalize-boundary-marker": 1.700000029813964e-05, |
| "collective-stream-id-checker": 8.000000525498763e-05, |
| "comparison-expander": 0.0005869999877177179, |
| "computation-deduplicator": 7.500000356230885e-05, |
| "conditional-to-select": 1.700000029813964e-05, |
| "config-lowering": 8.800000068731606e-05, |
| "constant-statistics": 0.0005440000095404685, |
| "constant_folding": 0.00032700004521757364, |
| "cse": 3.7000001611886546e-05, |
| "dce": 9.100000170292333e-05, |
| "dot_decomposer": 0.0013370000524446368, |
| "dynamic-slice-transpose": 1.2000000424450263e-05, |
| "eliminate-redundant-compare": 0.0003020000003743917, |
| "emit-offloaded-dropout": 3.9999998989515007e-05, |
| "flatten-call-graph": 0.0009239999344572425, |
| "fuse-send-recv": 7.79999973019585e-05, |
| "hilo::LegalizeAlias": 1.1999999514955562e-05, |
| "hilo::NeuronInstCombine": 0.00018899999849963933, |
| "hilo::NeuronOpFusion": 4.5000000682193786e-05, |
| "hilo::ReplaceTokenTypeWithU8Pass": 5.7999997807201e-05, |
| "hilo::ScheduleFusion": 0.00016099998902063817, |
| "hilo::SixtyFourHack": 6.70000008540228e-05, |
| "hilo::VerifyAliasing": 4.999999873689376e-06, |
| "hlo-mac-count": 0.0013409999664872885, |
| "hlo-verifier": 0.007716999854892492, |
| "instruction-histogram": 0.0007719999994151294, |
| "io-con-pipe-begin": 4.999999873689376e-06, |
| "io-con-pipe-end": 9.999999974752427e-07, |
| "io-layout-normalization": 0.00139999995008111, |
| "io-statistics": 6.299999949987978e-05, |
| "legalize-ccops": 3.999999989900971e-06, |
| "legalize-compare": 1.1000000085914508e-05, |
| "lower-argminmax-custom-call": 1.1000000085914508e-05, |
| "map-inline": 0.0008809999562799931, |
| "metadata-naming": 6.70000008540228e-05, |
| "mlir::detail::OpToOpPassAdaptor": 0.00020599999697878957, |
| "mlir::hlo::MhloToPyPenguin": 0.00291300006210804, |
| "mlir::mhlo::LowerComplexExtraPass": 0.00027200000477023423, |
| "mlir::mhlo::LowerComplexPass": 0.0003980000037699938, |
| "native-to-custom-softmax": 0.0007730000070296228, |
| "native-to-custom-softmax-dx": 0.0006189999985508621, |
| "operand_upcaster": 6.299999949987978e-05, |
| "opt-barrier-removal": 0.0005789999850094318, |
| "post-par-pipe-begin": 7.999999979801942e-06, |
| "post-par-pipe-end": 0.0, |
| "post-partition-simplification": 0.0017419999931007624, |
| "pre-par-pipe-begin": 1.9999999949504854e-06, |
| "pre-par-pipe-end": 0.0, |
| "pre-partition-simplification": 0.1384889930486679, |
| "replace-minimum-constant": 0.0004579999949783087, |
| "reshape-mover": 0.00011000000085914508, |
| "simplify-concat": 0.00014099999680183828, |
| "simplify-while-loops": 9.40000027185306e-05, |
| "transform-variadic-reduce": 8.100000559352338e-05, |
| "tuple-simplifier": 0.00030600003083236516, |
| "unpack-nested-aws-ntwsr": 0.000438000017311424, |
| "unroll-while-loop": 1.8999999156221747e-05, |
| "zero_sized_hlo_elimination": 0.0008750000270083547 |
| }, |
| "hilo": { |
| "ConstantSize": 2368805.0, |
| "HloInputCount": 475.0, |
| "HloMacCount": 206469595136.0, |
| "HloOutputCount": 73.0, |
| "IfmapSize": 8266549248.0, |
| "OfmapSize": 75497472.0, |
| "OutputsReadFromCount": 0.0, |
| "PassthroughTensorsCount": 0.0, |
| "RedundantOutputCount": 0.0, |
| "Traffic": 1751252352.0 |
| }, |
| "tensorizer": { |
| "DMATilingProfiler::TotalInstructionsAfterTiling": 49538.0, |
| "StaticProfiler::AifUb": 304.240234375, |
| "StaticProfiler::ArithmeticIntensityTensorizer": 278.67474365234375, |
| "StaticProfiler::AverageDmaLength": 1974.1033935546875, |
| "StaticProfiler::DDRTransferBytes": 862646080.0, |
| "StaticProfiler::InternalTransferBytes": 669456896.0, |
| "StaticProfiler::LoadExpanded": 390679.0, |
| "StaticProfiler::StoreExpanded": 7261.0, |
| "StaticProfiler::TotalDMAExpanded": 397940.0, |
| "StaticProfiler::TotalDynamicInstancesCount": 59578.0, |
| "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 59132.0, |
| "StaticProfiler::TotalLNCComm": 0.0, |
| "StaticProfiler::TotalLNCCommTransfer": 0.0, |
| "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0, |
| "TilingProfiler::DmaInstructionsAfterTiling": 0.0, |
| "TilingProfiler::GenericInstructionsAfterTiling": 4.0, |
| "TilingProfiler::MatMultInstructionsAfterTiling": 28224.0, |
| "TilingProfiler::NumPfTransposes": 5.0, |
| "TilingProfiler::NumPfTransposesForIo": 1.0, |
| "TilingProfiler::NumPfTransposesForLocal": 1.0, |
| "TilingProfiler::NumPfTransposesForNonlocal": 3.0, |
| "TilingProfiler::PfTransposeInstructions": 19777.0, |
| "TilingProfiler::PfTransposeInstructionsForIo": 19008.0, |
| "TilingProfiler::PfTransposeInstructionsForLocal": 1.0, |
| "TilingProfiler::PfTransposeInstructionsForNonlocal": 768.0, |
| "TilingProfiler::ReduceInstructionsAfterTiling": 6.0, |
| "TilingProfiler::SimdInstructionsAfterTiling": 303.0, |
| "TilingProfiler::TotalInstructionsAfterTiling": 0.0, |
| "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0, |
| "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0, |
| "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0, |
| "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0, |
| "TransformConvOp::conv2d_column_packing": 0.0, |
| "TransformConvOp::conv2d_column_packing_1": 0.0, |
| "TransformConvOp::conv2d_column_packing_io10": 0.0, |
| "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0 |
| } |
| }, |
| "all": { |
| "compiletime": { |
| "algsimp": 0.002532999962568283, |
| "call-inliner": 0.00042600001324899495, |
| "collective-stream-id-checker": 6.70000008540228e-05, |
| "comparison-expander": 0.0005719999899156392, |
| "constant-statistics": 0.0005440000095404685, |
| "constant_folding": 0.0003000000142492354, |
| "dce": 8.800000068731606e-05, |
| "dot_decomposer": 0.0013370000524446368, |
| "eliminate-redundant-compare": 0.000291000003926456, |
| "flatten-call-graph": 0.0008929999894462526, |
| "hlo-mac-count": 0.0010870000114664435, |
| "hlo-verifier": 0.007048000115901232, |
| "instruction-histogram": 0.0007719999994151294, |
| "io-con-pipe-begin": 4.999999873689376e-06, |
| "io-con-pipe-end": 9.999999974752427e-07, |
| "io-layout-normalization": 0.00139999995008111, |
| "io-statistics": 6.299999949987978e-05, |
| "map-inline": 0.0008459999808110297, |
| "native-to-custom-softmax": 0.0006709999870508909, |
| "native-to-custom-softmax-dx": 0.0005300000193528831, |
| "opt-barrier-removal": 0.0005789999850094318, |
| "pre-par-pipe-begin": 1.9999999949504854e-06, |
| "pre-par-pipe-end": 0.0, |
| "pre-partition-simplification": 0.1384889930486679, |
| "replace-minimum-constant": 0.00041700000292621553, |
| "reshape-mover": 9.999999747378752e-05, |
| "simplify-while-loops": 8.800000068731606e-05, |
| "tuple-simplifier": 0.000291000003926456, |
| "unpack-nested-aws-ntwsr": 0.00042600001324899495, |
| "unroll-while-loop": 1.8999999156221747e-05, |
| "zero_sized_hlo_elimination": 0.0008750000270083547 |
| } |
| }, |
| "cumsum": { |
| "compiletime": { |
| "CoalesceCCOp": 0.00023508071899414063, |
| "DMALocalityOpt": 0.00017404556274414063, |
| "DMAProfiler": 0.0008785724639892578, |
| "DataStreaming": 0.0002880096435546875, |
| "DoNothing": 0.00011467933654785156, |
| "ExpandISAMacro": 0.0006787776947021484, |
| "FactorizeBlkDims": 0.0004444122314453125, |
| "InferPSumTensor": 0.0004467964172363281, |
| "LateLegalizeInst": 0.000461578369140625, |
| "LateNeuronInstComb": 0.0004818439483642578, |
| "LegalizeSundaAccess": 0.0016222000122070313, |
| "LegalizeType": 0.0002703666687011719, |
| "LowerBroadcast": 0.00025391578674316406, |
| "LowerIntrinsics": 0.00021457672119140625, |
| "LowerTranspose": 0.00024318695068359375, |
| "NeuronInstComb": 0.00048065185546875, |
| "NeuronLICM": 0.00038552284240722656, |
| "NeuronSimplifyPredicates": 0.0027823448181152344, |
| "NeuronValueNumbering": 0.00043129920959472656, |
| "SFKVectorizer": 0.003134012222290039, |
| "SimpleAllReduceTiling": 0.00022721290588378906, |
| "SimplifyNeuronTensor": 0.0005092620849609375, |
| "SpillPSum": 0.0005443096160888672, |
| "WeightCoalescing": 0.00020051002502441406 |
| } |
| }, |
| "sg00": { |
| "compiletime": { |
| "CanonicalizeConv": 9.999999974752427e-07, |
| "CanonicalizeForTensorizer": 1.700000029813964e-05, |
| "Canonicalizer": 0.00033599999733269215, |
| "HoistCompute": 3.000000106112566e-06, |
| "IdentifyCrossPassTensors": 1.5999999959603883e-05, |
| "MemcastMotion": 1.1000000085914508e-05, |
| "PenguinizeFunctions": 1.8000000636675395e-05, |
| "PruneFunctions": 1.4000000192027073e-05, |
| "RemoveOptimizationBarriers": 1.2999999853491317e-05, |
| "ScatterMotion": 2.4000000848900527e-05, |
| "TensorizerLegalizationPass": 2.700000004551839e-05, |
| "VerifySupportedOps": 1.2000000424450263e-05, |
| "algsimp": 6.500000017695129e-05, |
| "batchnorm_expander": 1.4000000192027073e-05, |
| "boundary-marker-removal": 3.999999989900971e-06, |
| "call-inliner": 9.000000318337698e-06, |
| "canonicalize-boundary-marker": 6.000000212225132e-06, |
| "collective-stream-id-checker": 3.999999989900971e-06, |
| "comparison-expander": 4.999999873689376e-06, |
| "computation-deduplicator": 2.300000051036477e-05, |
| "conditional-to-select": 4.999999873689376e-06, |
| "config-lowering": 3.9999998989515007e-05, |
| "constant_folding": 9.000000318337698e-06, |
| "cse": 1.2999999853491317e-05, |
| "dce": 9.999999974752427e-07, |
| "dynamic-slice-transpose": 3.999999989900971e-06, |
| "eliminate-redundant-compare": 3.999999989900971e-06, |
| "emit-offloaded-dropout": 1.4000000192027073e-05, |
| "flatten-call-graph": 9.999999747378752e-06, |
| "fuse-send-recv": 2.8000000384054147e-05, |
| "hilo::LegalizeAlias": 4.999999873689376e-06, |
| "hilo::NeuronInstCombine": 8.499999967170879e-05, |
| "hilo::NeuronOpFusion": 2.700000004551839e-05, |
| "hilo::ReplaceTokenTypeWithU8Pass": 1.4999999621068127e-05, |
| "hilo::ScheduleFusion": 9.999999974752427e-07, |
| "hilo::SixtyFourHack": 1.2999999853491317e-05, |
| "hilo::VerifyAliasing": 1.9999999949504854e-06, |
| "hlo-mac-count": 3.099999958067201e-05, |
| "hlo-verifier": 0.0002530000056140125, |
| "legalize-ccops": 9.999999974752427e-07, |
| "legalize-compare": 3.999999989900971e-06, |
| "lower-argminmax-custom-call": 3.999999989900971e-06, |
| "map-inline": 1.2000000424450263e-05, |
| "metadata-naming": 2.4000000848900527e-05, |
| "mlir::detail::OpToOpPassAdaptor": 2.2000000171829015e-05, |
| "mlir::hlo::MhloToPyPenguin": 0.0010389999952167273, |
| "mlir::mhlo::LowerComplexExtraPass": 8.800000068731606e-05, |
| "mlir::mhlo::LowerComplexPass": 0.00014200000441633165, |
| "native-to-custom-softmax": 9.000000136438757e-05, |
| "native-to-custom-softmax-dx": 4.3000000005122274e-05, |
| "operand_upcaster": 2.300000051036477e-05, |
| "post-par-pipe-begin": 3.000000106112566e-06, |
| "post-par-pipe-end": 0.0, |
| "post-partition-simplification": 0.0006249999860301614, |
| "replace-minimum-constant": 2.5999999706982635e-05, |
| "reshape-mover": 3.999999989900971e-06, |
| "simplify-concat": 4.8000001697801054e-05, |
| "simplify-while-loops": 1.9999999949504854e-06, |
| "transform-variadic-reduce": 9.000000318337698e-06, |
| "tuple-simplifier": 4.999999873689376e-06, |
| "unpack-nested-aws-ntwsr": 3.999999989900971e-06, |
| "unroll-while-loop": 0.0 |
| }, |
| "hilo": { |
| "ArithmeticIntensity": 73.02900695800781, |
| "ConstantSize": 2368805.0, |
| "HloInputCount": 475.0, |
| "HloMacCount": 25769803776.0, |
| "HloOutputCount": 73.0, |
| "IfmapSize": 8266549248.0, |
| "OfmapSize": 75497472.0, |
| "OutputsReadFromCount": 0.0, |
| "PassthroughTensorsCount": 0.0, |
| "RedundantOutputCount": 0.0, |
| "Traffic": 705741632.0 |
| } |
| }, |
| "sg0000": { |
| "compiletime": { |
| "AGOrderingAnalysisPass": 0.0818486213684082, |
| "AffinePredicateResolution": 0.001665353775024414, |
| "AliasDependencyElimination": 0.00012683868408203125, |
| "AliasDependencyInduction": 0.008559942245483398, |
| "AliasDependencyReset": 0.03254294395446777, |
| "BFComputeCutting": 0.003969907760620117, |
| "BirCodeGenLoop": 0.06339025497436523, |
| "CCOpFusion": 0.029911041259765625, |
| "CanonicalizeDAGForPGTiling": 0.003092050552368164, |
| "CanonicalizeIR": 0.002637147903442383, |
| "CoalesceCCOp": 0.0051479339599609375, |
| "CommuteConcat": 0.001478433609008789, |
| "DMALocalityOpt": 0.0016412734985351563, |
| "DMAProfiler": 0.004613637924194336, |
| "DMATilingProfiler": 0.004850864410400391, |
| "DataLocalityOpt": 0.11357831954956055, |
| "DataStreaming": 0.0061092376708984375, |
| "DeConcat": 0.0013332366943359375, |
| "DeadCodeElimination": 0.0018727779388427734, |
| "DeadStoreElimination": 0.03094482421875, |
| "DelinearIndices": 0.008640289306640625, |
| "Delinearization": 0.0035429000854492188, |
| "DoNothing": 8.106231689453125e-05, |
| "DramToDramTranspose": 0.03549051284790039, |
| "DumpGraphAndMetadata": 0.005577564239501953, |
| "EliminateDivs": 0.003966331481933594, |
| "ExpandBatchNorm": 0.0017447471618652344, |
| "ExpandISAMacro": 0.002687692642211914, |
| "FactorizeBlkDims": 0.026469945907592773, |
| "FactorizeThreadAxesInFreeDims": 0.0014863014221191406, |
| "FlattenMacroLoop": 0.00392913818359375, |
| "GenericAccessSimplifier": 0.0018973350524902344, |
| "InferInitValue": 0.03517007827758789, |
| "InferIntrinsicOnCC": 0.010237932205200195, |
| "InferNeuronTensor": 0.051462411880493164, |
| "InferNonlocalTensors": 0.14991235733032227, |
| "InferPSumTensor": 0.053685903549194336, |
| "InlineNativeKernels": 0.002433300018310547, |
| "InsertIOTransposes": 0.015550613403320313, |
| "InsertLocalTransposes": 0.007843017578125, |
| "InsertOffloadedTransposes": 0.002854585647583008, |
| "LICM": 0.003381490707397461, |
| "LateLegalizeInst": 0.0069310665130615234, |
| "LateLegalizePostSplit": 0.00308990478515625, |
| "LateLowerReshapeOp": 0.0017940998077392578, |
| "LateLowerTensorOp": 0.005001068115234375, |
| "LateNeuronInstComb": 0.016704320907592773, |
| "LayoutPreprocessing": 0.033296823501586914, |
| "LayoutPreprocessingAndAnalysis": 0.12302517890930176, |
| "LayoutRequirementAnalysis": 0.007364988327026367, |
| "LegalizeCCOpLayout": 0.0029296875, |
| "LegalizeOpLevelAlias": 0.0016987323760986328, |
| "LegalizePartitionReduce": 0.0014727115631103516, |
| "LegalizeSundaAccess": 0.04025077819824219, |
| "LegalizeSundaMacro": 0.009906291961669922, |
| "LegalizeType": 0.004493236541748047, |
| "LocalLayoutOpt": 0.017308473587036133, |
| "LoopFusion": 0.005831241607666016, |
| "LoopSplitting": 0.00037789344787597656, |
| "LowerBroadcast": 0.0016851425170898438, |
| "LowerCCOpBlockAxis": 0.005655765533447266, |
| "LowerComplexBroadcast": 0.0020987987518310547, |
| "LowerIntrinsics": 0.040236473083496094, |
| "LowerTensorOp": 0.012641191482543945, |
| "LowerTranspose": 0.0125579833984375, |
| "MacroGeneration": 0.08074021339416504, |
| "MaskPropagation": 0.005038022994995117, |
| "MemcpyElimination": 0.10875082015991211, |
| "MutateDataType": 0.0013315677642822266, |
| "NeuronAliasDependencyInduction": 0.00025200843811035156, |
| "NeuronAliasDependencyReset": 0.021958112716674805, |
| "NeuronInstComb": 0.009703636169433594, |
| "NeuronLICM": 0.011526823043823242, |
| "NeuronLoopFusion": 0.017663955688476563, |
| "NeuronLoopInterchange": 0.002567291259765625, |
| "NeuronSimplifier": 0.011670589447021484, |
| "NeuronSimplifyPredicates": 0.017385244369506836, |
| "NeuronValueNumbering": 0.004181623458862305, |
| "OptimizeAliasedCopyChain": 0.0017867088317871094, |
| "OptimizeNKIKernels": 0.0020456314086914063, |
| "PAGLayoutOpt": 0.3681519031524658, |
| "PComputeCutting": 0.008620262145996094, |
| "PGLayoutTilingPipeline": 1.3210320472717285, |
| "PGTiling": 0.27039527893066406, |
| "PadElimination": 0.0003745555877685547, |
| "ParAxesAnnotation": 0.33005595207214355, |
| "PartialLoopFusion": 0.026912212371826172, |
| "PartialSimdFusion": 0.03544425964355469, |
| "PerfectLoopNest": 0.0021703243255615234, |
| "RecognizeOpIdiom": 0.004334926605224609, |
| "Recompute": 0.0002522468566894531, |
| "RelaxPredicates": 0.004270076751708984, |
| "Rematerialization": 0.005487918853759766, |
| "ReshapeWeights": 0.0006825923919677734, |
| "ResolveAccessConflict": 0.003779888153076172, |
| "ResolveComplicatePredicates": 0.0018131732940673828, |
| "RewriteReplicationMatmul": 0.002633333206176758, |
| "RewriteWeights": 0.0036499500274658203, |
| "SFKVectorizer": 0.2772994041442871, |
| "SimpleAllReduceTiling": 0.002454519271850586, |
| "Simplifier": 0.0045070648193359375, |
| "SimplifyMacroPredicates": 0.016190290451049805, |
| "SimplifyNeuronTensor": 0.01452183723449707, |
| "SimplifySlice": 0.0010039806365966797, |
| "SimplifyTensor": 0.00657200813293457, |
| "SpillPSum": 0.02208685874938965, |
| "SplitAPUnionSets": 0.04095458984375, |
| "SplitAccGrp": 0.0018160343170166016, |
| "StaticProfiler": 0.004816770553588867, |
| "StaticTransposeLocalTensor": 0.004886150360107422, |
| "SundaISel": 0.04611611366271973, |
| "TCTransform": 0.001667022705078125, |
| "TensorInitialization": 0.022374629974365234, |
| "TensorOpSimplifier": 0.006697177886962891, |
| "TensorOpTransform": 0.02793574333190918, |
| "TileCCOps": 0.007641792297363281, |
| "TilingProfiler": 0.015750885009765625, |
| "TransformConvOp": 0.0026845932006835938, |
| "TritiumFusion": 0.08186149597167969, |
| "ValueNumbering": 0.0026755332946777344, |
| "VectorizeDMA": 0.007223367691040039, |
| "VectorizeMatMult": 0.018305540084838867, |
| "WeightCoalescing": 0.003328561782836914, |
| "ZeroSizeTensorElimination": 0.00011229515075683594 |
| }, |
| "tensorizer": { |
| "DMATilingProfiler::TotalInstructionsAfterTiling": 5862.0, |
| "StaticProfiler::AifUb": 88.59026336669922, |
| "StaticProfiler::ArithmeticIntensityTensorizer": 582.7418823242188, |
| "StaticProfiler::AverageDmaLength": 2248.2685546875, |
| "StaticProfiler::AverageFractalPeUtilization": 99.96076202392578, |
| "StaticProfiler::AveragePartitionUtilization": 99.90216827392578, |
| "StaticProfiler::AveragePeUtilization": 99.8394546508789, |
| "StaticProfiler::DDRTransferBytes": 104424704.0, |
| "StaticProfiler::InternalTransferBytes": 122421248.0, |
| "StaticProfiler::LoadExpanded": 25346.0, |
| "StaticProfiler::LocalizationEfficiency": 657.7944946289063, |
| "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 915.0787353515625, |
| "StaticProfiler::StoreExpanded": 10753.0, |
| "StaticProfiler::TotalDMAExpanded": 36099.0, |
| "StaticProfiler::TotalDynamicInstancesCount": 8866.0, |
| "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 8860.0, |
| "StaticProfiler::TotalLNCComm": 0.0, |
| "StaticProfiler::TotalLNCCommTransfer": 0.0, |
| "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0, |
| "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0, |
| "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0, |
| "TilingProfiler::DmaInstructionsAfterTiling": 0.0, |
| "TilingProfiler::GenericInstructionsAfterTiling": 96.0, |
| "TilingProfiler::MatMultInstructionsAfterTiling": 3080.0, |
| "TilingProfiler::NumPfTransposes": 8.0, |
| "TilingProfiler::NumPfTransposesForIo": 1.0, |
| "TilingProfiler::NumPfTransposesForLocal": 6.0, |
| "TilingProfiler::NumPfTransposesForNonlocal": 1.0, |
| "TilingProfiler::PfTransposeInstructions": 1760.0, |
| "TilingProfiler::PfTransposeInstructionsForIo": 256.0, |
| "TilingProfiler::PfTransposeInstructionsForLocal": 1376.0, |
| "TilingProfiler::PfTransposeInstructionsForNonlocal": 128.0, |
| "TilingProfiler::ReduceInstructionsAfterTiling": 0.0, |
| "TilingProfiler::SimdInstructionsAfterTiling": 649.0, |
| "TilingProfiler::TotalInstructionsAfterTiling": 0.0, |
| "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0, |
| "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0, |
| "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0, |
| "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0, |
| "TransformConvOp::conv2d_column_packing": 0.0, |
| "TransformConvOp::conv2d_column_packing_1": 0.0, |
| "TransformConvOp::conv2d_column_packing_io10": 0.0, |
| "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0 |
| } |
| }, |
| "sg0001": { |
| "compiletime": { |
| "AGOrderingAnalysisPass": 0.03383040428161621, |
| "AffinePredicateResolution": 0.0015320777893066406, |
| "AliasDependencyElimination": 0.0001316070556640625, |
| "AliasDependencyInduction": 0.00819253921508789, |
| "AliasDependencyReset": 0.02862405776977539, |
| "BFComputeCutting": 0.004217624664306641, |
| "BirCodeGenLoop": 0.0443270206451416, |
| "CCOpFusion": 0.04336118698120117, |
| "CanonicalizeDAGForPGTiling": 0.0031616687774658203, |
| "CanonicalizeIR": 0.0021500587463378906, |
| "CoalesceCCOp": 0.005389690399169922, |
| "CommuteConcat": 0.0024237632751464844, |
| "DMALocalityOpt": 0.002274751663208008, |
| "DMAProfiler": 0.003973484039306641, |
| "DMATilingProfiler": 0.005924701690673828, |
| "DataLocalityOpt": 0.15027260780334473, |
| "DataStreaming": 0.004762887954711914, |
| "DeConcat": 0.0018739700317382813, |
| "DeadCodeElimination": 0.001882314682006836, |
| "DeadStoreElimination": 0.03486776351928711, |
| "DelinearIndices": 0.009628534317016602, |
| "Delinearization": 0.0037381649017333984, |
| "DoNothing": 6.985664367675781e-05, |
| "DramToDramTranspose": 0.04212188720703125, |
| "DumpGraphAndMetadata": 0.004312038421630859, |
| "EliminateDivs": 0.005432844161987305, |
| "ExpandBatchNorm": 0.002119302749633789, |
| "ExpandISAMacro": 0.0024309158325195313, |
| "FactorizeBlkDims": 0.02235579490661621, |
| "FactorizeThreadAxesInFreeDims": 0.0018169879913330078, |
| "FlattenMacroLoop": 0.0030968189239501953, |
| "GenericAccessSimplifier": 0.0016777515411376953, |
| "InferInitValue": 0.043079376220703125, |
| "InferIntrinsicOnCC": 0.009890556335449219, |
| "InferNeuronTensor": 0.05600404739379883, |
| "InferNonlocalTensors": 0.03101515769958496, |
| "InferPSumTensor": 0.04645681381225586, |
| "InlineNativeKernels": 0.0015399456024169922, |
| "InsertIOTransposes": 0.02417731285095215, |
| "InsertLocalTransposes": 0.0070497989654541016, |
| "InsertOffloadedTransposes": 0.003525972366333008, |
| "LICM": 0.0035805702209472656, |
| "LateLegalizeInst": 0.0041539669036865234, |
| "LateLegalizePostSplit": 0.0027403831481933594, |
| "LateLowerReshapeOp": 0.0014560222625732422, |
| "LateLowerTensorOp": 0.004617452621459961, |
| "LateNeuronInstComb": 0.015344619750976563, |
| "LayoutPreprocessing": 0.030884981155395508, |
| "LayoutPreprocessingAndAnalysis": 0.06435275077819824, |
| "LayoutRequirementAnalysis": 0.007463693618774414, |
| "LegalizeCCOpLayout": 0.002064943313598633, |
| "LegalizeOpLevelAlias": 0.0011925697326660156, |
| "LegalizePartitionReduce": 0.0026116371154785156, |
| "LegalizeSundaAccess": 0.015822887420654297, |
| "LegalizeSundaMacro": 0.012560844421386719, |
| "LegalizeType": 0.004744291305541992, |
| "LocalLayoutOpt": 0.023772239685058594, |
| "LoopFusion": 0.0066835880279541016, |
| "LoopSplitting": 0.0003638267517089844, |
| "LowerBroadcast": 0.002238750457763672, |
| "LowerCCOpBlockAxis": 0.005678653717041016, |
| "LowerComplexBroadcast": 0.0019271373748779297, |
| "LowerIntrinsics": 0.042801856994628906, |
| "LowerTensorOp": 0.012106895446777344, |
| "LowerTranspose": 0.012960433959960938, |
| "MacroGeneration": 0.12800955772399902, |
| "MaskPropagation": 0.0031516551971435547, |
| "MemcpyElimination": 0.10379505157470703, |
| "MutateDataType": 0.0014393329620361328, |
| "NeuronAliasDependencyInduction": 0.00022101402282714844, |
| "NeuronAliasDependencyReset": 0.020102262496948242, |
| "NeuronInstComb": 0.009283781051635742, |
| "NeuronLICM": 0.009867429733276367, |
| "NeuronLoopFusion": 0.022713661193847656, |
| "NeuronLoopInterchange": 0.002709627151489258, |
| "NeuronSimplifier": 0.01328134536743164, |
| "NeuronSimplifyPredicates": 0.001683950424194336, |
| "NeuronValueNumbering": 0.0033235549926757813, |
| "OptimizeAliasedCopyChain": 0.0007724761962890625, |
| "OptimizeNKIKernels": 0.001729726791381836, |
| "PAGLayoutOpt": 0.13172507286071777, |
| "PComputeCutting": 0.007474422454833984, |
| "PGLayoutTilingPipeline": 0.9329550266265869, |
| "PGTiling": 0.4518747329711914, |
| "PadElimination": 0.00040411949157714844, |
| "ParAxesAnnotation": 0.0915369987487793, |
| "PartialLoopFusion": 0.020573854446411133, |
| "PartialSimdFusion": 0.04284977912902832, |
| "PerfectLoopNest": 0.002377033233642578, |
| "RecognizeOpIdiom": 0.0049991607666015625, |
| "Recompute": 0.00026345252990722656, |
| "RelaxPredicates": 0.0034220218658447266, |
| "Rematerialization": 0.0021615028381347656, |
| "ReshapeWeights": 0.0007557868957519531, |
| "ResolveAccessConflict": 0.004181861877441406, |
| "ResolveComplicatePredicates": 0.0015151500701904297, |
| "RewriteReplicationMatmul": 0.0020759105682373047, |
| "RewriteWeights": 0.0036649703979492188, |
| "SFKVectorizer": 0.20148277282714844, |
| "SimpleAllReduceTiling": 0.003732442855834961, |
| "Simplifier": 0.004697084426879883, |
| "SimplifyMacroPredicates": 0.007361888885498047, |
| "SimplifyNeuronTensor": 0.009825944900512695, |
| "SimplifySlice": 0.0017888545989990234, |
| "SimplifyTensor": 0.006832122802734375, |
| "SpillPSum": 0.022799968719482422, |
| "SplitAPUnionSets": 0.020108938217163086, |
| "SplitAccGrp": 0.0015766620635986328, |
| "StaticProfiler": 0.004146099090576172, |
| "StaticTransposeLocalTensor": 0.004926919937133789, |
| "SundaISel": 0.04472494125366211, |
| "TCTransform": 0.0018138885498046875, |
| "TensorInitialization": 0.004791736602783203, |
| "TensorOpSimplifier": 0.0064849853515625, |
| "TensorOpTransform": 0.0333099365234375, |
| "TileCCOps": 0.0056035518646240234, |
| "TilingProfiler": 0.01600933074951172, |
| "TransformConvOp": 0.002446413040161133, |
| "TritiumFusion": 0.1239166259765625, |
| "ValueNumbering": 0.0030901432037353516, |
| "VectorizeDMA": 0.0017311573028564453, |
| "VectorizeMatMult": 0.018932580947875977, |
| "WeightCoalescing": 0.0027513504028320313, |
| "ZeroSizeTensorElimination": 0.00011587142944335938 |
| }, |
| "tensorizer": { |
| "DMATilingProfiler::TotalInstructionsAfterTiling": 15811.0, |
| "StaticProfiler::AifUb": 934.4357299804688, |
| "StaticProfiler::ArithmeticIntensityTensorizer": 708.8487548828125, |
| "StaticProfiler::AverageDmaLength": 1109.3380126953125, |
| "StaticProfiler::AverageFractalPeUtilization": 100.0, |
| "StaticProfiler::AveragePartitionUtilization": 99.8372802734375, |
| "StaticProfiler::AveragePeUtilization": 100.0, |
| "StaticProfiler::DDRTransferBytes": 306283520.0, |
| "StaticProfiler::InternalTransferBytes": 104595456.0, |
| "StaticProfiler::LoadExpanded": 257536.0, |
| "StaticProfiler::LocalizationEfficiency": 75.85848236083984, |
| "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 85.1915054321289, |
| "StaticProfiler::StoreExpanded": 10241.0, |
| "StaticProfiler::TotalDMAExpanded": 267777.0, |
| "StaticProfiler::TotalDynamicInstancesCount": 19667.0, |
| "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 19667.0, |
| "StaticProfiler::TotalLNCComm": 0.0, |
| "StaticProfiler::TotalLNCCommTransfer": 0.0, |
| "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0, |
| "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0, |
| "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0, |
| "TilingProfiler::DmaInstructionsAfterTiling": 0.0, |
| "TilingProfiler::GenericInstructionsAfterTiling": 64.0, |
| "TilingProfiler::MatMultInstructionsAfterTiling": 12288.0, |
| "TilingProfiler::NumPfTransposes": 9.0, |
| "TilingProfiler::NumPfTransposesForIo": 3.0, |
| "TilingProfiler::NumPfTransposesForLocal": 4.0, |
| "TilingProfiler::NumPfTransposesForNonlocal": 2.0, |
| "TilingProfiler::PfTransposeInstructions": 1904.0, |
| "TilingProfiler::PfTransposeInstructionsForIo": 272.0, |
| "TilingProfiler::PfTransposeInstructionsForLocal": 1120.0, |
| "TilingProfiler::PfTransposeInstructionsForNonlocal": 512.0, |
| "TilingProfiler::ReduceInstructionsAfterTiling": 0.0, |
| "TilingProfiler::SimdInstructionsAfterTiling": 704.0, |
| "TilingProfiler::TotalInstructionsAfterTiling": 0.0, |
| "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0, |
| "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0, |
| "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0, |
| "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0, |
| "TransformConvOp::conv2d_column_packing": 0.0, |
| "TransformConvOp::conv2d_column_packing_1": 0.0, |
| "TransformConvOp::conv2d_column_packing_io10": 0.0, |
| "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0 |
| } |
| }, |
| "sg0002": { |
| "compiletime": { |
| "AGOrderingAnalysisPass": 0.01837611198425293, |
| "AffinePredicateResolution": 0.0011184215545654297, |
| "AliasDependencyElimination": 0.00015664100646972656, |
| "AliasDependencyInduction": 0.005170583724975586, |
| "AliasDependencyReset": 0.027508020401000977, |
| "BFComputeCutting": 0.0036101341247558594, |
| "BirCodeGenLoop": 0.4774467945098877, |
| "CCOpFusion": 0.033265113830566406, |
| "CanonicalizeDAGForPGTiling": 0.004282712936401367, |
| "CanonicalizeIR": 0.0024569034576416016, |
| "CoalesceCCOp": 0.013993978500366211, |
| "CommuteConcat": 0.0017316341400146484, |
| "DMALocalityOpt": 0.005455970764160156, |
| "DMAProfiler": 0.012103080749511719, |
| "DMATilingProfiler": 0.0037560462951660156, |
| "DataLocalityOpt": 0.07645320892333984, |
| "DataStreaming": 0.03701519966125488, |
| "DeConcat": 0.0018520355224609375, |
| "DeadCodeElimination": 0.0020148754119873047, |
| "DeadStoreElimination": 0.006912708282470703, |
| "DelinearIndices": 0.004647254943847656, |
| "Delinearization": 0.003908872604370117, |
| "DoNothing": 7.414817810058594e-05, |
| "DramToDramTranspose": 0.02015542984008789, |
| "DumpGraphAndMetadata": 0.08691883087158203, |
| "EliminateDivs": 0.0025060176849365234, |
| "ExpandBatchNorm": 0.0027189254760742188, |
| "ExpandISAMacro": 0.010967254638671875, |
| "FactorizeBlkDims": 0.009678840637207031, |
| "FactorizeThreadAxesInFreeDims": 0.0023202896118164063, |
| "FlattenMacroLoop": 0.00232696533203125, |
| "GenericAccessSimplifier": 0.0008094310760498047, |
| "InferInitValue": 0.02833867073059082, |
| "InferIntrinsicOnCC": 0.008923768997192383, |
| "InferNeuronTensor": 0.025766372680664063, |
| "InferNonlocalTensors": 0.014599800109863281, |
| "InferPSumTensor": 0.28374218940734863, |
| "InlineNativeKernels": 0.00860905647277832, |
| "InsertIOTransposes": 0.01989889144897461, |
| "InsertLocalTransposes": 0.004229307174682617, |
| "InsertOffloadedTransposes": 0.0029871463775634766, |
| "LICM": 0.0030870437622070313, |
| "LateLegalizeInst": 0.01364445686340332, |
| "LateLegalizePostSplit": 0.014872312545776367, |
| "LateLowerReshapeOp": 0.0010464191436767578, |
| "LateLowerTensorOp": 0.002707242965698242, |
| "LateNeuronInstComb": 0.01008152961730957, |
| "LayoutPreprocessing": 0.026853561401367188, |
| "LayoutPreprocessingAndAnalysis": 0.0556035041809082, |
| "LayoutRequirementAnalysis": 0.004946470260620117, |
| "LegalizeCCOpLayout": 0.0025353431701660156, |
| "LegalizeOpLevelAlias": 0.0018966197967529297, |
| "LegalizePartitionReduce": 0.0017490386962890625, |
| "LegalizeSundaAccess": 0.0763850212097168, |
| "LegalizeSundaMacro": 0.012125253677368164, |
| "LegalizeType": 0.012414693832397461, |
| "LocalLayoutOpt": 0.013860225677490234, |
| "LoopFusion": 0.005201578140258789, |
| "LoopSplitting": 0.0003204345703125, |
| "LowerBroadcast": 0.0018322467803955078, |
| "LowerCCOpBlockAxis": 0.0040171146392822266, |
| "LowerComplexBroadcast": 0.002280712127685547, |
| "LowerIntrinsics": 0.3141806125640869, |
| "LowerTensorOp": 0.01141357421875, |
| "LowerTranspose": 0.012679815292358398, |
| "MacroGeneration": 0.034410953521728516, |
| "MaskPropagation": 0.0028192996978759766, |
| "MemcpyElimination": 0.02788853645324707, |
| "MutateDataType": 0.0012311935424804688, |
| "NeuronAliasDependencyInduction": 0.0001773834228515625, |
| "NeuronAliasDependencyReset": 0.024976015090942383, |
| "NeuronInstComb": 0.004675865173339844, |
| "NeuronLICM": 0.03631091117858887, |
| "NeuronLoopFusion": 0.008457422256469727, |
| "NeuronLoopInterchange": 0.001413106918334961, |
| "NeuronSimplifier": 0.007856369018554688, |
| "NeuronSimplifyPredicates": 0.11957573890686035, |
| "NeuronValueNumbering": 0.004334449768066406, |
| "OptimizeAliasedCopyChain": 0.0006341934204101563, |
| "OptimizeNKIKernels": 0.38834357261657715, |
| "PAGLayoutOpt": 0.0889735221862793, |
| "PComputeCutting": 0.005109071731567383, |
| "PGLayoutTilingPipeline": 0.6248171329498291, |
| "PGTiling": 0.1645822525024414, |
| "PadElimination": 0.0003485679626464844, |
| "ParAxesAnnotation": 0.05196070671081543, |
| "PartialLoopFusion": 0.011112451553344727, |
| "PartialSimdFusion": 0.012138128280639648, |
| "PerfectLoopNest": 0.002288341522216797, |
| "RecognizeOpIdiom": 0.0041277408599853516, |
| "Recompute": 0.00026416778564453125, |
| "RelaxPredicates": 0.01356959342956543, |
| "Rematerialization": 0.0024864673614501953, |
| "ReshapeWeights": 0.0007522106170654297, |
| "ResolveAccessConflict": 0.0048482418060302734, |
| "ResolveComplicatePredicates": 0.0015094280242919922, |
| "RewriteReplicationMatmul": 0.0015668869018554688, |
| "RewriteWeights": 0.0027174949645996094, |
| "SFKVectorizer": 0.27501797676086426, |
| "SimpleAllReduceTiling": 0.009322166442871094, |
| "Simplifier": 0.003630399703979492, |
| "SimplifyMacroPredicates": 0.011396646499633789, |
| "SimplifyNeuronTensor": 1.0555970668792725, |
| "SimplifySlice": 0.0023348331451416016, |
| "SimplifyTensor": 0.005601167678833008, |
| "SpillPSum": 0.013073921203613281, |
| "SplitAPUnionSets": 0.11336159706115723, |
| "SplitAccGrp": 0.001394510269165039, |
| "StaticProfiler": 0.014252662658691406, |
| "StaticTransposeLocalTensor": 0.003930330276489258, |
| "SundaISel": 0.04436635971069336, |
| "TCTransform": 0.0008757114410400391, |
| "TensorInitialization": 0.01558232307434082, |
| "TensorOpSimplifier": 0.004608869552612305, |
| "TensorOpTransform": 0.01923346519470215, |
| "TileCCOps": 0.005507707595825195, |
| "TilingProfiler": 0.007405757904052734, |
| "TransformConvOp": 0.0030219554901123047, |
| "TritiumFusion": 0.05425119400024414, |
| "ValueNumbering": 0.0020017623901367188, |
| "VectorizeDMA": 0.002228975296020508, |
| "VectorizeMatMult": 0.006806135177612305, |
| "WeightCoalescing": 0.008460044860839844, |
| "ZeroSizeTensorElimination": 0.00014281272888183594 |
| }, |
| "tensorizer": { |
| "DMATilingProfiler::TotalInstructionsAfterTiling": 49538.0, |
| "StaticProfiler::AifUb": 304.240234375, |
| "StaticProfiler::ArithmeticIntensityTensorizer": 278.67474365234375, |
| "StaticProfiler::AverageDmaLength": 1974.1033935546875, |
| "StaticProfiler::AverageFractalPeUtilization": 99.7004623413086, |
| "StaticProfiler::AveragePartitionUtilization": 97.94140625, |
| "StaticProfiler::AveragePeUtilization": 98.78884887695313, |
| "StaticProfiler::DDRTransferBytes": 862646080.0, |
| "StaticProfiler::InternalTransferBytes": 669456896.0, |
| "StaticProfiler::LoadExpanded": 390679.0, |
| "StaticProfiler::LocalizationEfficiency": 91.59693145751953, |
| "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 95.863037109375, |
| "StaticProfiler::StoreExpanded": 7261.0, |
| "StaticProfiler::TotalDMAExpanded": 397940.0, |
| "StaticProfiler::TotalDynamicInstancesCount": 59578.0, |
| "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 59132.0, |
| "StaticProfiler::TotalLNCComm": 0.0, |
| "StaticProfiler::TotalLNCCommTransfer": 0.0, |
| "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0, |
| "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0, |
| "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0, |
| "TilingProfiler::DmaInstructionsAfterTiling": 0.0, |
| "TilingProfiler::GenericInstructionsAfterTiling": 4.0, |
| "TilingProfiler::MatMultInstructionsAfterTiling": 28224.0, |
| "TilingProfiler::NumPfTransposes": 5.0, |
| "TilingProfiler::NumPfTransposesForIo": 1.0, |
| "TilingProfiler::NumPfTransposesForLocal": 1.0, |
| "TilingProfiler::NumPfTransposesForNonlocal": 3.0, |
| "TilingProfiler::PfTransposeInstructions": 19777.0, |
| "TilingProfiler::PfTransposeInstructionsForIo": 19008.0, |
| "TilingProfiler::PfTransposeInstructionsForLocal": 1.0, |
| "TilingProfiler::PfTransposeInstructionsForNonlocal": 768.0, |
| "TilingProfiler::ReduceInstructionsAfterTiling": 6.0, |
| "TilingProfiler::SimdInstructionsAfterTiling": 303.0, |
| "TilingProfiler::TotalInstructionsAfterTiling": 0.0, |
| "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0, |
| "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0, |
| "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0, |
| "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0, |
| "TransformConvOp::conv2d_column_packing": 0.0, |
| "TransformConvOp::conv2d_column_packing_1": 0.0, |
| "TransformConvOp::conv2d_column_packing_io10": 0.0, |
| "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0 |
| } |
| }, |
| "sg01": { |
| "compiletime": { |
| "CanonicalizeConv": 2.2000000171829015e-05, |
| "CanonicalizeForTensorizer": 1.4999999621068127e-05, |
| "Canonicalizer": 0.00025499999173916876, |
| "HoistCompute": 1.9999999949504854e-06, |
| "IdentifyCrossPassTensors": 2.499999936844688e-05, |
| "MemcastMotion": 7.000000096013537e-06, |
| "PenguinizeFunctions": 1.4999999621068127e-05, |
| "PruneFunctions": 1.8999999156221747e-05, |
| "RemoveOptimizationBarriers": 2.700000004551839e-05, |
| "ScatterMotion": 1.9999999494757503e-05, |
| "TensorizerLegalizationPass": 1.9999999494757503e-05, |
| "VerifySupportedOps": 1.1000000085914508e-05, |
| "algsimp": 6.299999949987978e-05, |
| "batchnorm_expander": 1.4000000192027073e-05, |
| "boundary-marker-removal": 4.999999873689376e-06, |
| "call-inliner": 9.000000318337698e-06, |
| "canonicalize-boundary-marker": 6.000000212225132e-06, |
| "collective-stream-id-checker": 4.999999873689376e-06, |
| "comparison-expander": 4.999999873689376e-06, |
| "computation-deduplicator": 2.5999999706982635e-05, |
| "conditional-to-select": 4.999999873689376e-06, |
| "config-lowering": 2.2000000171829015e-05, |
| "constant_folding": 9.000000318337698e-06, |
| "cse": 1.2000000424450263e-05, |
| "dce": 9.999999974752427e-07, |
| "dynamic-slice-transpose": 3.999999989900971e-06, |
| "eliminate-redundant-compare": 3.999999989900971e-06, |
| "emit-offloaded-dropout": 1.2999999853491317e-05, |
| "flatten-call-graph": 9.000000318337698e-06, |
| "fuse-send-recv": 2.9999999242136255e-05, |
| "hilo::LegalizeAlias": 4.999999873689376e-06, |
| "hilo::NeuronInstCombine": 3.600000127335079e-05, |
| "hilo::NeuronOpFusion": 1.4000000192027073e-05, |
| "hilo::ReplaceTokenTypeWithU8Pass": 2.099999983329326e-05, |
| "hilo::ScheduleFusion": 9.999999974752427e-07, |
| "hilo::SixtyFourHack": 1.4000000192027073e-05, |
| "hilo::VerifyAliasing": 1.9999999949504854e-06, |
| "hlo-mac-count": 4.600000102072954e-05, |
| "hlo-verifier": 0.00023299999884329736, |
| "legalize-ccops": 9.999999974752427e-07, |
| "legalize-compare": 3.999999989900971e-06, |
| "lower-argminmax-custom-call": 3.999999989900971e-06, |
| "map-inline": 1.1000000085914508e-05, |
| "metadata-naming": 2.700000004551839e-05, |
| "mlir::detail::OpToOpPassAdaptor": 0.00017299999308306724, |
| "mlir::hlo::MhloToPyPenguin": 0.0009840000420808792, |
| "mlir::mhlo::LowerComplexExtraPass": 9.600000339560211e-05, |
| "mlir::mhlo::LowerComplexPass": 0.00013600000238511711, |
| "native-to-custom-softmax": 6.000000212225132e-06, |
| "native-to-custom-softmax-dx": 2.2000000171829015e-05, |
| "operand_upcaster": 2.4000000848900527e-05, |
| "post-par-pipe-begin": 3.000000106112566e-06, |
| "post-par-pipe-end": 0.0, |
| "post-partition-simplification": 0.0005660000024363399, |
| "replace-minimum-constant": 6.000000212225132e-06, |
| "reshape-mover": 3.000000106112566e-06, |
| "simplify-concat": 4.8999998398358e-05, |
| "simplify-while-loops": 1.9999999949504854e-06, |
| "transform-variadic-reduce": 9.000000318337698e-06, |
| "tuple-simplifier": 4.999999873689376e-06, |
| "unpack-nested-aws-ntwsr": 3.999999989900971e-06, |
| "unroll-while-loop": 0.0 |
| }, |
| "hilo": { |
| "ArithmeticIntensity": 834.6854858398438, |
| "HloMacCount": 103079215104.0, |
| "Traffic": 246989344.0 |
| } |
| }, |
| "sg02": { |
| "compiletime": { |
| "CanonicalizeConv": 0.0, |
| "CanonicalizeForTensorizer": 1.4000000192027073e-05, |
| "Canonicalizer": 0.0003129999968223274, |
| "HoistCompute": 9.999999974752427e-07, |
| "IdentifyCrossPassTensors": 1.2000000424450263e-05, |
| "MemcastMotion": 0.0, |
| "PenguinizeFunctions": 9.999999747378752e-06, |
| "PruneFunctions": 7.999999979801942e-06, |
| "RemoveOptimizationBarriers": 9.000000318337698e-06, |
| "ScatterMotion": 3.000000106112566e-06, |
| "TensorizerLegalizationPass": 6.000000212225132e-06, |
| "VerifySupportedOps": 1.2000000424450263e-05, |
| "algsimp": 5.999999848427251e-05, |
| "batchnorm_expander": 1.2999999853491317e-05, |
| "boundary-marker-removal": 3.999999989900971e-06, |
| "call-inliner": 9.999999747378752e-06, |
| "canonicalize-boundary-marker": 4.999999873689376e-06, |
| "collective-stream-id-checker": 3.999999989900971e-06, |
| "comparison-expander": 4.999999873689376e-06, |
| "computation-deduplicator": 2.5999999706982635e-05, |
| "conditional-to-select": 7.000000096013537e-06, |
| "config-lowering": 2.5999999706982635e-05, |
| "constant_folding": 9.000000318337698e-06, |
| "cse": 1.2000000424450263e-05, |
| "dce": 9.999999974752427e-07, |
| "dynamic-slice-transpose": 3.999999989900971e-06, |
| "eliminate-redundant-compare": 3.000000106112566e-06, |
| "emit-offloaded-dropout": 1.2999999853491317e-05, |
| "flatten-call-graph": 1.2000000424450263e-05, |
| "fuse-send-recv": 1.9999999494757503e-05, |
| "hilo::LegalizeAlias": 1.9999999949504854e-06, |
| "hilo::NeuronInstCombine": 6.800000119255856e-05, |
| "hilo::NeuronOpFusion": 3.999999989900971e-06, |
| "hilo::ReplaceTokenTypeWithU8Pass": 2.2000000171829015e-05, |
| "hilo::ScheduleFusion": 0.00015900000289548188, |
| "hilo::SixtyFourHack": 3.9999998989515007e-05, |
| "hilo::VerifyAliasing": 9.999999974752427e-07, |
| "hlo-mac-count": 0.00017699999443721026, |
| "hlo-verifier": 0.0001829999964684248, |
| "legalize-ccops": 1.9999999949504854e-06, |
| "legalize-compare": 3.000000106112566e-06, |
| "lower-argminmax-custom-call": 3.000000106112566e-06, |
| "map-inline": 1.2000000424450263e-05, |
| "metadata-naming": 1.5999999959603883e-05, |
| "mlir::detail::OpToOpPassAdaptor": 1.1000000085914508e-05, |
| "mlir::hlo::MhloToPyPenguin": 0.0008900000248104334, |
| "mlir::mhlo::LowerComplexExtraPass": 8.800000068731606e-05, |
| "mlir::mhlo::LowerComplexPass": 0.00011999999696854502, |
| "native-to-custom-softmax": 6.000000212225132e-06, |
| "native-to-custom-softmax-dx": 2.4000000848900527e-05, |
| "operand_upcaster": 1.5999999959603883e-05, |
| "post-par-pipe-begin": 1.9999999949504854e-06, |
| "post-par-pipe-end": 0.0, |
| "post-partition-simplification": 0.0005510000046342611, |
| "replace-minimum-constant": 9.000000318337698e-06, |
| "reshape-mover": 3.000000106112566e-06, |
| "simplify-concat": 4.400000034365803e-05, |
| "simplify-while-loops": 1.9999999949504854e-06, |
| "transform-variadic-reduce": 6.299999949987978e-05, |
| "tuple-simplifier": 4.999999873689376e-06, |
| "unpack-nested-aws-ntwsr": 3.999999989900971e-06, |
| "unroll-while-loop": 0.0 |
| }, |
| "hilo": { |
| "ArithmeticIntensity": 194.41075134277344, |
| "HloMacCount": 77620576256.0, |
| "Traffic": 798521408.0 |
| } |
| } |
| } |