Qwen3-8BSharded / context_encoding_model /_tp0_bk3 /global_metric_store.json
jburtoft's picture
Upload folder using huggingface_hub
ee61cf7 verified
{
"Average": {
"tensorizer": {
"StaticProfiler::AverageFractalPeUtilization": 99.7004623413086,
"StaticProfiler::AveragePartitionUtilization": 97.94140625,
"StaticProfiler::AveragePeUtilization": 98.78884887695313,
"StaticProfiler::LocalizationEfficiency": 91.59693145751953,
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 95.863037109375,
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0
}
},
"Count": {
"tensorizer": {
"StaticProfiler::AverageFractalPeUtilization": 1.0,
"StaticProfiler::AveragePartitionUtilization": 1.0,
"StaticProfiler::AveragePeUtilization": 1.0,
"StaticProfiler::LocalizationEfficiency": 1.0,
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 1.0,
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 1.0,
"TilingProfiler::AveragePeUtilizationAfterTiling": 1.0
}
},
"Sum": {
"compiletime": {
"AGOrderingAnalysisPass": 0.01837611198425293,
"AffinePredicateResolution": 0.0011184215545654297,
"AliasDependencyElimination": 0.00015664100646972656,
"AliasDependencyInduction": 0.005170583724975586,
"AliasDependencyReset": 0.027508020401000977,
"BFComputeCutting": 0.0036101341247558594,
"BirCodeGenLoop": 0.4774467945098877,
"CCOpFusion": 0.033265113830566406,
"CanonicalizeConv": 2.300000051036477e-05,
"CanonicalizeDAGForPGTiling": 0.004282712936401367,
"CanonicalizeForTensorizer": 4.600000102072954e-05,
"CanonicalizeIR": 0.0024569034576416016,
"Canonicalizer": 0.0009039999567903578,
"CoalesceCCOp": 0.014229059219360352,
"CommuteConcat": 0.0017316341400146484,
"DMALocalityOpt": 0.005630016326904297,
"DMAProfiler": 0.012981653213500977,
"DMATilingProfiler": 0.0037560462951660156,
"DataLocalityOpt": 0.07645320892333984,
"DataStreaming": 0.03730320930480957,
"DeConcat": 0.0018520355224609375,
"DeadCodeElimination": 0.0020148754119873047,
"DeadStoreElimination": 0.006912708282470703,
"DelinearIndices": 0.004647254943847656,
"Delinearization": 0.003908872604370117,
"DoNothing": 0.0001888275146484375,
"DramToDramTranspose": 0.02015542984008789,
"DumpGraphAndMetadata": 0.08691883087158203,
"EliminateDivs": 0.0025060176849365234,
"ExpandBatchNorm": 0.0027189254760742188,
"ExpandISAMacro": 0.011646032333374023,
"FactorizeBlkDims": 0.010123252868652344,
"FactorizeThreadAxesInFreeDims": 0.0023202896118164063,
"FlattenMacroLoop": 0.00232696533203125,
"GenericAccessSimplifier": 0.0008094310760498047,
"HoistCompute": 5.999999757477781e-06,
"IdentifyCrossPassTensors": 5.2999999752501026e-05,
"InferInitValue": 0.02833867073059082,
"InferIntrinsicOnCC": 0.008923768997192383,
"InferNeuronTensor": 0.025766372680664063,
"InferNonlocalTensors": 0.014599800109863281,
"InferPSumTensor": 0.28418898582458496,
"InlineNativeKernels": 0.00860905647277832,
"InsertIOTransposes": 0.01989889144897461,
"InsertLocalTransposes": 0.004229307174682617,
"InsertOffloadedTransposes": 0.0029871463775634766,
"LICM": 0.0030870437622070313,
"LateLegalizeInst": 0.014106035232543945,
"LateLegalizePostSplit": 0.014872312545776367,
"LateLowerReshapeOp": 0.0010464191436767578,
"LateLowerTensorOp": 0.002707242965698242,
"LateNeuronInstComb": 0.010563373565673828,
"LayoutPreprocessing": 0.026853561401367188,
"LayoutPreprocessingAndAnalysis": 0.0556035041809082,
"LayoutRequirementAnalysis": 0.004946470260620117,
"LegalizeCCOpLayout": 0.0025353431701660156,
"LegalizeOpLevelAlias": 0.0018966197967529297,
"LegalizePartitionReduce": 0.0017490386962890625,
"LegalizeSundaAccess": 0.07800722122192383,
"LegalizeSundaMacro": 0.012125253677368164,
"LegalizeType": 0.012685060501098633,
"LocalLayoutOpt": 0.013860225677490234,
"LoopFusion": 0.005201578140258789,
"LoopSplitting": 0.0003204345703125,
"LowerBroadcast": 0.002086162567138672,
"LowerCCOpBlockAxis": 0.0040171146392822266,
"LowerComplexBroadcast": 0.002280712127685547,
"LowerIntrinsics": 0.3143951892852783,
"LowerTensorOp": 0.01141357421875,
"LowerTranspose": 0.012923002243041992,
"MacroGeneration": 0.034410953521728516,
"MaskPropagation": 0.0028192996978759766,
"MemcastMotion": 1.8000000636675395e-05,
"MemcpyElimination": 0.02788853645324707,
"MutateDataType": 0.0012311935424804688,
"NeuronAliasDependencyInduction": 0.0001773834228515625,
"NeuronAliasDependencyReset": 0.024976015090942383,
"NeuronInstComb": 0.005156517028808594,
"NeuronLICM": 0.036696434020996094,
"NeuronLoopFusion": 0.008457422256469727,
"NeuronLoopInterchange": 0.001413106918334961,
"NeuronSimplifier": 0.007856369018554688,
"NeuronSimplifyPredicates": 0.12235808372497559,
"NeuronValueNumbering": 0.004765748977661133,
"OptimizeAliasedCopyChain": 0.0006341934204101563,
"OptimizeNKIKernels": 0.38834357261657715,
"PAGLayoutOpt": 0.0889735221862793,
"PComputeCutting": 0.005109071731567383,
"PGLayoutTilingPipeline": 0.6248171329498291,
"PGTiling": 0.1645822525024414,
"PadElimination": 0.0003485679626464844,
"ParAxesAnnotation": 0.05196070671081543,
"PartialLoopFusion": 0.011112451553344727,
"PartialSimdFusion": 0.012138128280639648,
"PenguinizeFunctions": 4.3000000005122274e-05,
"PerfectLoopNest": 0.002288341522216797,
"PruneFunctions": 4.099999932805076e-05,
"RecognizeOpIdiom": 0.0041277408599853516,
"Recompute": 0.00026416778564453125,
"RelaxPredicates": 0.01356959342956543,
"Rematerialization": 0.0024864673614501953,
"RemoveOptimizationBarriers": 4.900000203633681e-05,
"ReshapeWeights": 0.0007522106170654297,
"ResolveAccessConflict": 0.0048482418060302734,
"ResolveComplicatePredicates": 0.0015094280242919922,
"RewriteReplicationMatmul": 0.0015668869018554688,
"RewriteWeights": 0.0027174949645996094,
"SFKVectorizer": 0.2781519889831543,
"ScatterMotion": 4.70000013592653e-05,
"SimpleAllReduceTiling": 0.009549379348754883,
"Simplifier": 0.003630399703979492,
"SimplifyMacroPredicates": 0.011396646499633789,
"SimplifyNeuronTensor": 1.0561063289642334,
"SimplifySlice": 0.0023348331451416016,
"SimplifyTensor": 0.005601167678833008,
"SpillPSum": 0.013618230819702148,
"SplitAPUnionSets": 0.11336159706115723,
"SplitAccGrp": 0.001394510269165039,
"StaticProfiler": 0.014252662658691406,
"StaticTransposeLocalTensor": 0.003930330276489258,
"SundaISel": 0.04436635971069336,
"TCTransform": 0.0008757114410400391,
"TensorInitialization": 0.01558232307434082,
"TensorOpSimplifier": 0.004608869552612305,
"TensorOpTransform": 0.01923346519470215,
"TensorizerLegalizationPass": 5.2999999752501026e-05,
"TileCCOps": 0.005507707595825195,
"TilingProfiler": 0.007405757904052734,
"TransformConvOp": 0.0030219554901123047,
"TritiumFusion": 0.05425119400024414,
"ValueNumbering": 0.0020017623901367188,
"VectorizeDMA": 0.002228975296020508,
"VectorizeMatMult": 0.006806135177612305,
"VerifySupportedOps": 3.5000000934815034e-05,
"WeightCoalescing": 0.008660554885864258,
"ZeroSizeTensorElimination": 0.00014281272888183594,
"algsimp": 0.0027209999971091747,
"batchnorm_expander": 4.099999932805076e-05,
"boundary-marker-removal": 1.2999998943996616e-05,
"call-inliner": 0.0004540000227279961,
"canonicalize-boundary-marker": 1.700000029813964e-05,
"collective-stream-id-checker": 8.000000525498763e-05,
"comparison-expander": 0.0005869999877177179,
"computation-deduplicator": 7.500000356230885e-05,
"conditional-to-select": 1.700000029813964e-05,
"config-lowering": 8.800000068731606e-05,
"constant-statistics": 0.0005440000095404685,
"constant_folding": 0.00032700004521757364,
"cse": 3.7000001611886546e-05,
"dce": 9.100000170292333e-05,
"dot_decomposer": 0.0013370000524446368,
"dynamic-slice-transpose": 1.2000000424450263e-05,
"eliminate-redundant-compare": 0.0003020000003743917,
"emit-offloaded-dropout": 3.9999998989515007e-05,
"flatten-call-graph": 0.0009239999344572425,
"fuse-send-recv": 7.79999973019585e-05,
"hilo::LegalizeAlias": 1.1999999514955562e-05,
"hilo::NeuronInstCombine": 0.00018899999849963933,
"hilo::NeuronOpFusion": 4.5000000682193786e-05,
"hilo::ReplaceTokenTypeWithU8Pass": 5.7999997807201e-05,
"hilo::ScheduleFusion": 0.00016099998902063817,
"hilo::SixtyFourHack": 6.70000008540228e-05,
"hilo::VerifyAliasing": 4.999999873689376e-06,
"hlo-mac-count": 0.0013409999664872885,
"hlo-verifier": 0.007716999854892492,
"instruction-histogram": 0.0007719999994151294,
"io-con-pipe-begin": 4.999999873689376e-06,
"io-con-pipe-end": 9.999999974752427e-07,
"io-layout-normalization": 0.00139999995008111,
"io-statistics": 6.299999949987978e-05,
"legalize-ccops": 3.999999989900971e-06,
"legalize-compare": 1.1000000085914508e-05,
"lower-argminmax-custom-call": 1.1000000085914508e-05,
"map-inline": 0.0008809999562799931,
"metadata-naming": 6.70000008540228e-05,
"mlir::detail::OpToOpPassAdaptor": 0.00020599999697878957,
"mlir::hlo::MhloToPyPenguin": 0.00291300006210804,
"mlir::mhlo::LowerComplexExtraPass": 0.00027200000477023423,
"mlir::mhlo::LowerComplexPass": 0.0003980000037699938,
"native-to-custom-softmax": 0.0007730000070296228,
"native-to-custom-softmax-dx": 0.0006189999985508621,
"operand_upcaster": 6.299999949987978e-05,
"opt-barrier-removal": 0.0005789999850094318,
"post-par-pipe-begin": 7.999999979801942e-06,
"post-par-pipe-end": 0.0,
"post-partition-simplification": 0.0017419999931007624,
"pre-par-pipe-begin": 1.9999999949504854e-06,
"pre-par-pipe-end": 0.0,
"pre-partition-simplification": 0.1384889930486679,
"replace-minimum-constant": 0.0004579999949783087,
"reshape-mover": 0.00011000000085914508,
"simplify-concat": 0.00014099999680183828,
"simplify-while-loops": 9.40000027185306e-05,
"transform-variadic-reduce": 8.100000559352338e-05,
"tuple-simplifier": 0.00030600003083236516,
"unpack-nested-aws-ntwsr": 0.000438000017311424,
"unroll-while-loop": 1.8999999156221747e-05,
"zero_sized_hlo_elimination": 0.0008750000270083547
},
"hilo": {
"ConstantSize": 2368805.0,
"HloInputCount": 475.0,
"HloMacCount": 206469595136.0,
"HloOutputCount": 73.0,
"IfmapSize": 8266549248.0,
"OfmapSize": 75497472.0,
"OutputsReadFromCount": 0.0,
"PassthroughTensorsCount": 0.0,
"RedundantOutputCount": 0.0,
"Traffic": 1751252352.0
},
"tensorizer": {
"DMATilingProfiler::TotalInstructionsAfterTiling": 49538.0,
"StaticProfiler::AifUb": 304.240234375,
"StaticProfiler::ArithmeticIntensityTensorizer": 278.67474365234375,
"StaticProfiler::AverageDmaLength": 1974.1033935546875,
"StaticProfiler::DDRTransferBytes": 862646080.0,
"StaticProfiler::InternalTransferBytes": 669456896.0,
"StaticProfiler::LoadExpanded": 390679.0,
"StaticProfiler::StoreExpanded": 7261.0,
"StaticProfiler::TotalDMAExpanded": 397940.0,
"StaticProfiler::TotalDynamicInstancesCount": 59578.0,
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 59132.0,
"StaticProfiler::TotalLNCComm": 0.0,
"StaticProfiler::TotalLNCCommTransfer": 0.0,
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
"TilingProfiler::GenericInstructionsAfterTiling": 4.0,
"TilingProfiler::MatMultInstructionsAfterTiling": 28224.0,
"TilingProfiler::NumPfTransposes": 5.0,
"TilingProfiler::NumPfTransposesForIo": 1.0,
"TilingProfiler::NumPfTransposesForLocal": 1.0,
"TilingProfiler::NumPfTransposesForNonlocal": 3.0,
"TilingProfiler::PfTransposeInstructions": 19777.0,
"TilingProfiler::PfTransposeInstructionsForIo": 19008.0,
"TilingProfiler::PfTransposeInstructionsForLocal": 1.0,
"TilingProfiler::PfTransposeInstructionsForNonlocal": 768.0,
"TilingProfiler::ReduceInstructionsAfterTiling": 6.0,
"TilingProfiler::SimdInstructionsAfterTiling": 303.0,
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
"TransformConvOp::conv2d_column_packing": 0.0,
"TransformConvOp::conv2d_column_packing_1": 0.0,
"TransformConvOp::conv2d_column_packing_io10": 0.0,
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
}
},
"all": {
"compiletime": {
"algsimp": 0.002532999962568283,
"call-inliner": 0.00042600001324899495,
"collective-stream-id-checker": 6.70000008540228e-05,
"comparison-expander": 0.0005719999899156392,
"constant-statistics": 0.0005440000095404685,
"constant_folding": 0.0003000000142492354,
"dce": 8.800000068731606e-05,
"dot_decomposer": 0.0013370000524446368,
"eliminate-redundant-compare": 0.000291000003926456,
"flatten-call-graph": 0.0008929999894462526,
"hlo-mac-count": 0.0010870000114664435,
"hlo-verifier": 0.007048000115901232,
"instruction-histogram": 0.0007719999994151294,
"io-con-pipe-begin": 4.999999873689376e-06,
"io-con-pipe-end": 9.999999974752427e-07,
"io-layout-normalization": 0.00139999995008111,
"io-statistics": 6.299999949987978e-05,
"map-inline": 0.0008459999808110297,
"native-to-custom-softmax": 0.0006709999870508909,
"native-to-custom-softmax-dx": 0.0005300000193528831,
"opt-barrier-removal": 0.0005789999850094318,
"pre-par-pipe-begin": 1.9999999949504854e-06,
"pre-par-pipe-end": 0.0,
"pre-partition-simplification": 0.1384889930486679,
"replace-minimum-constant": 0.00041700000292621553,
"reshape-mover": 9.999999747378752e-05,
"simplify-while-loops": 8.800000068731606e-05,
"tuple-simplifier": 0.000291000003926456,
"unpack-nested-aws-ntwsr": 0.00042600001324899495,
"unroll-while-loop": 1.8999999156221747e-05,
"zero_sized_hlo_elimination": 0.0008750000270083547
}
},
"cumsum": {
"compiletime": {
"CoalesceCCOp": 0.00023508071899414063,
"DMALocalityOpt": 0.00017404556274414063,
"DMAProfiler": 0.0008785724639892578,
"DataStreaming": 0.0002880096435546875,
"DoNothing": 0.00011467933654785156,
"ExpandISAMacro": 0.0006787776947021484,
"FactorizeBlkDims": 0.0004444122314453125,
"InferPSumTensor": 0.0004467964172363281,
"LateLegalizeInst": 0.000461578369140625,
"LateNeuronInstComb": 0.0004818439483642578,
"LegalizeSundaAccess": 0.0016222000122070313,
"LegalizeType": 0.0002703666687011719,
"LowerBroadcast": 0.00025391578674316406,
"LowerIntrinsics": 0.00021457672119140625,
"LowerTranspose": 0.00024318695068359375,
"NeuronInstComb": 0.00048065185546875,
"NeuronLICM": 0.00038552284240722656,
"NeuronSimplifyPredicates": 0.0027823448181152344,
"NeuronValueNumbering": 0.00043129920959472656,
"SFKVectorizer": 0.003134012222290039,
"SimpleAllReduceTiling": 0.00022721290588378906,
"SimplifyNeuronTensor": 0.0005092620849609375,
"SpillPSum": 0.0005443096160888672,
"WeightCoalescing": 0.00020051002502441406
}
},
"sg00": {
"compiletime": {
"CanonicalizeConv": 9.999999974752427e-07,
"CanonicalizeForTensorizer": 1.700000029813964e-05,
"Canonicalizer": 0.00033599999733269215,
"HoistCompute": 3.000000106112566e-06,
"IdentifyCrossPassTensors": 1.5999999959603883e-05,
"MemcastMotion": 1.1000000085914508e-05,
"PenguinizeFunctions": 1.8000000636675395e-05,
"PruneFunctions": 1.4000000192027073e-05,
"RemoveOptimizationBarriers": 1.2999999853491317e-05,
"ScatterMotion": 2.4000000848900527e-05,
"TensorizerLegalizationPass": 2.700000004551839e-05,
"VerifySupportedOps": 1.2000000424450263e-05,
"algsimp": 6.500000017695129e-05,
"batchnorm_expander": 1.4000000192027073e-05,
"boundary-marker-removal": 3.999999989900971e-06,
"call-inliner": 9.000000318337698e-06,
"canonicalize-boundary-marker": 6.000000212225132e-06,
"collective-stream-id-checker": 3.999999989900971e-06,
"comparison-expander": 4.999999873689376e-06,
"computation-deduplicator": 2.300000051036477e-05,
"conditional-to-select": 4.999999873689376e-06,
"config-lowering": 3.9999998989515007e-05,
"constant_folding": 9.000000318337698e-06,
"cse": 1.2999999853491317e-05,
"dce": 9.999999974752427e-07,
"dynamic-slice-transpose": 3.999999989900971e-06,
"eliminate-redundant-compare": 3.999999989900971e-06,
"emit-offloaded-dropout": 1.4000000192027073e-05,
"flatten-call-graph": 9.999999747378752e-06,
"fuse-send-recv": 2.8000000384054147e-05,
"hilo::LegalizeAlias": 4.999999873689376e-06,
"hilo::NeuronInstCombine": 8.499999967170879e-05,
"hilo::NeuronOpFusion": 2.700000004551839e-05,
"hilo::ReplaceTokenTypeWithU8Pass": 1.4999999621068127e-05,
"hilo::ScheduleFusion": 9.999999974752427e-07,
"hilo::SixtyFourHack": 1.2999999853491317e-05,
"hilo::VerifyAliasing": 1.9999999949504854e-06,
"hlo-mac-count": 3.099999958067201e-05,
"hlo-verifier": 0.0002530000056140125,
"legalize-ccops": 9.999999974752427e-07,
"legalize-compare": 3.999999989900971e-06,
"lower-argminmax-custom-call": 3.999999989900971e-06,
"map-inline": 1.2000000424450263e-05,
"metadata-naming": 2.4000000848900527e-05,
"mlir::detail::OpToOpPassAdaptor": 2.2000000171829015e-05,
"mlir::hlo::MhloToPyPenguin": 0.0010389999952167273,
"mlir::mhlo::LowerComplexExtraPass": 8.800000068731606e-05,
"mlir::mhlo::LowerComplexPass": 0.00014200000441633165,
"native-to-custom-softmax": 9.000000136438757e-05,
"native-to-custom-softmax-dx": 4.3000000005122274e-05,
"operand_upcaster": 2.300000051036477e-05,
"post-par-pipe-begin": 3.000000106112566e-06,
"post-par-pipe-end": 0.0,
"post-partition-simplification": 0.0006249999860301614,
"replace-minimum-constant": 2.5999999706982635e-05,
"reshape-mover": 3.999999989900971e-06,
"simplify-concat": 4.8000001697801054e-05,
"simplify-while-loops": 1.9999999949504854e-06,
"transform-variadic-reduce": 9.000000318337698e-06,
"tuple-simplifier": 4.999999873689376e-06,
"unpack-nested-aws-ntwsr": 3.999999989900971e-06,
"unroll-while-loop": 0.0
},
"hilo": {
"ArithmeticIntensity": 73.02900695800781,
"ConstantSize": 2368805.0,
"HloInputCount": 475.0,
"HloMacCount": 25769803776.0,
"HloOutputCount": 73.0,
"IfmapSize": 8266549248.0,
"OfmapSize": 75497472.0,
"OutputsReadFromCount": 0.0,
"PassthroughTensorsCount": 0.0,
"RedundantOutputCount": 0.0,
"Traffic": 705741632.0
}
},
"sg0000": {
"compiletime": {
"AGOrderingAnalysisPass": 0.0818486213684082,
"AffinePredicateResolution": 0.001665353775024414,
"AliasDependencyElimination": 0.00012683868408203125,
"AliasDependencyInduction": 0.008559942245483398,
"AliasDependencyReset": 0.03254294395446777,
"BFComputeCutting": 0.003969907760620117,
"BirCodeGenLoop": 0.06339025497436523,
"CCOpFusion": 0.029911041259765625,
"CanonicalizeDAGForPGTiling": 0.003092050552368164,
"CanonicalizeIR": 0.002637147903442383,
"CoalesceCCOp": 0.0051479339599609375,
"CommuteConcat": 0.001478433609008789,
"DMALocalityOpt": 0.0016412734985351563,
"DMAProfiler": 0.004613637924194336,
"DMATilingProfiler": 0.004850864410400391,
"DataLocalityOpt": 0.11357831954956055,
"DataStreaming": 0.0061092376708984375,
"DeConcat": 0.0013332366943359375,
"DeadCodeElimination": 0.0018727779388427734,
"DeadStoreElimination": 0.03094482421875,
"DelinearIndices": 0.008640289306640625,
"Delinearization": 0.0035429000854492188,
"DoNothing": 8.106231689453125e-05,
"DramToDramTranspose": 0.03549051284790039,
"DumpGraphAndMetadata": 0.005577564239501953,
"EliminateDivs": 0.003966331481933594,
"ExpandBatchNorm": 0.0017447471618652344,
"ExpandISAMacro": 0.002687692642211914,
"FactorizeBlkDims": 0.026469945907592773,
"FactorizeThreadAxesInFreeDims": 0.0014863014221191406,
"FlattenMacroLoop": 0.00392913818359375,
"GenericAccessSimplifier": 0.0018973350524902344,
"InferInitValue": 0.03517007827758789,
"InferIntrinsicOnCC": 0.010237932205200195,
"InferNeuronTensor": 0.051462411880493164,
"InferNonlocalTensors": 0.14991235733032227,
"InferPSumTensor": 0.053685903549194336,
"InlineNativeKernels": 0.002433300018310547,
"InsertIOTransposes": 0.015550613403320313,
"InsertLocalTransposes": 0.007843017578125,
"InsertOffloadedTransposes": 0.002854585647583008,
"LICM": 0.003381490707397461,
"LateLegalizeInst": 0.0069310665130615234,
"LateLegalizePostSplit": 0.00308990478515625,
"LateLowerReshapeOp": 0.0017940998077392578,
"LateLowerTensorOp": 0.005001068115234375,
"LateNeuronInstComb": 0.016704320907592773,
"LayoutPreprocessing": 0.033296823501586914,
"LayoutPreprocessingAndAnalysis": 0.12302517890930176,
"LayoutRequirementAnalysis": 0.007364988327026367,
"LegalizeCCOpLayout": 0.0029296875,
"LegalizeOpLevelAlias": 0.0016987323760986328,
"LegalizePartitionReduce": 0.0014727115631103516,
"LegalizeSundaAccess": 0.04025077819824219,
"LegalizeSundaMacro": 0.009906291961669922,
"LegalizeType": 0.004493236541748047,
"LocalLayoutOpt": 0.017308473587036133,
"LoopFusion": 0.005831241607666016,
"LoopSplitting": 0.00037789344787597656,
"LowerBroadcast": 0.0016851425170898438,
"LowerCCOpBlockAxis": 0.005655765533447266,
"LowerComplexBroadcast": 0.0020987987518310547,
"LowerIntrinsics": 0.040236473083496094,
"LowerTensorOp": 0.012641191482543945,
"LowerTranspose": 0.0125579833984375,
"MacroGeneration": 0.08074021339416504,
"MaskPropagation": 0.005038022994995117,
"MemcpyElimination": 0.10875082015991211,
"MutateDataType": 0.0013315677642822266,
"NeuronAliasDependencyInduction": 0.00025200843811035156,
"NeuronAliasDependencyReset": 0.021958112716674805,
"NeuronInstComb": 0.009703636169433594,
"NeuronLICM": 0.011526823043823242,
"NeuronLoopFusion": 0.017663955688476563,
"NeuronLoopInterchange": 0.002567291259765625,
"NeuronSimplifier": 0.011670589447021484,
"NeuronSimplifyPredicates": 0.017385244369506836,
"NeuronValueNumbering": 0.004181623458862305,
"OptimizeAliasedCopyChain": 0.0017867088317871094,
"OptimizeNKIKernels": 0.0020456314086914063,
"PAGLayoutOpt": 0.3681519031524658,
"PComputeCutting": 0.008620262145996094,
"PGLayoutTilingPipeline": 1.3210320472717285,
"PGTiling": 0.27039527893066406,
"PadElimination": 0.0003745555877685547,
"ParAxesAnnotation": 0.33005595207214355,
"PartialLoopFusion": 0.026912212371826172,
"PartialSimdFusion": 0.03544425964355469,
"PerfectLoopNest": 0.0021703243255615234,
"RecognizeOpIdiom": 0.004334926605224609,
"Recompute": 0.0002522468566894531,
"RelaxPredicates": 0.004270076751708984,
"Rematerialization": 0.005487918853759766,
"ReshapeWeights": 0.0006825923919677734,
"ResolveAccessConflict": 0.003779888153076172,
"ResolveComplicatePredicates": 0.0018131732940673828,
"RewriteReplicationMatmul": 0.002633333206176758,
"RewriteWeights": 0.0036499500274658203,
"SFKVectorizer": 0.2772994041442871,
"SimpleAllReduceTiling": 0.002454519271850586,
"Simplifier": 0.0045070648193359375,
"SimplifyMacroPredicates": 0.016190290451049805,
"SimplifyNeuronTensor": 0.01452183723449707,
"SimplifySlice": 0.0010039806365966797,
"SimplifyTensor": 0.00657200813293457,
"SpillPSum": 0.02208685874938965,
"SplitAPUnionSets": 0.04095458984375,
"SplitAccGrp": 0.0018160343170166016,
"StaticProfiler": 0.004816770553588867,
"StaticTransposeLocalTensor": 0.004886150360107422,
"SundaISel": 0.04611611366271973,
"TCTransform": 0.001667022705078125,
"TensorInitialization": 0.022374629974365234,
"TensorOpSimplifier": 0.006697177886962891,
"TensorOpTransform": 0.02793574333190918,
"TileCCOps": 0.007641792297363281,
"TilingProfiler": 0.015750885009765625,
"TransformConvOp": 0.0026845932006835938,
"TritiumFusion": 0.08186149597167969,
"ValueNumbering": 0.0026755332946777344,
"VectorizeDMA": 0.007223367691040039,
"VectorizeMatMult": 0.018305540084838867,
"WeightCoalescing": 0.003328561782836914,
"ZeroSizeTensorElimination": 0.00011229515075683594
},
"tensorizer": {
"DMATilingProfiler::TotalInstructionsAfterTiling": 5862.0,
"StaticProfiler::AifUb": 88.59026336669922,
"StaticProfiler::ArithmeticIntensityTensorizer": 582.7418823242188,
"StaticProfiler::AverageDmaLength": 2248.2685546875,
"StaticProfiler::AverageFractalPeUtilization": 99.96076202392578,
"StaticProfiler::AveragePartitionUtilization": 99.90216827392578,
"StaticProfiler::AveragePeUtilization": 99.8394546508789,
"StaticProfiler::DDRTransferBytes": 104424704.0,
"StaticProfiler::InternalTransferBytes": 122421248.0,
"StaticProfiler::LoadExpanded": 25346.0,
"StaticProfiler::LocalizationEfficiency": 657.7944946289063,
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 915.0787353515625,
"StaticProfiler::StoreExpanded": 10753.0,
"StaticProfiler::TotalDMAExpanded": 36099.0,
"StaticProfiler::TotalDynamicInstancesCount": 8866.0,
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 8860.0,
"StaticProfiler::TotalLNCComm": 0.0,
"StaticProfiler::TotalLNCCommTransfer": 0.0,
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
"TilingProfiler::GenericInstructionsAfterTiling": 96.0,
"TilingProfiler::MatMultInstructionsAfterTiling": 3080.0,
"TilingProfiler::NumPfTransposes": 8.0,
"TilingProfiler::NumPfTransposesForIo": 1.0,
"TilingProfiler::NumPfTransposesForLocal": 6.0,
"TilingProfiler::NumPfTransposesForNonlocal": 1.0,
"TilingProfiler::PfTransposeInstructions": 1760.0,
"TilingProfiler::PfTransposeInstructionsForIo": 256.0,
"TilingProfiler::PfTransposeInstructionsForLocal": 1376.0,
"TilingProfiler::PfTransposeInstructionsForNonlocal": 128.0,
"TilingProfiler::ReduceInstructionsAfterTiling": 0.0,
"TilingProfiler::SimdInstructionsAfterTiling": 649.0,
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
"TransformConvOp::conv2d_column_packing": 0.0,
"TransformConvOp::conv2d_column_packing_1": 0.0,
"TransformConvOp::conv2d_column_packing_io10": 0.0,
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
}
},
"sg0001": {
"compiletime": {
"AGOrderingAnalysisPass": 0.03383040428161621,
"AffinePredicateResolution": 0.0015320777893066406,
"AliasDependencyElimination": 0.0001316070556640625,
"AliasDependencyInduction": 0.00819253921508789,
"AliasDependencyReset": 0.02862405776977539,
"BFComputeCutting": 0.004217624664306641,
"BirCodeGenLoop": 0.0443270206451416,
"CCOpFusion": 0.04336118698120117,
"CanonicalizeDAGForPGTiling": 0.0031616687774658203,
"CanonicalizeIR": 0.0021500587463378906,
"CoalesceCCOp": 0.005389690399169922,
"CommuteConcat": 0.0024237632751464844,
"DMALocalityOpt": 0.002274751663208008,
"DMAProfiler": 0.003973484039306641,
"DMATilingProfiler": 0.005924701690673828,
"DataLocalityOpt": 0.15027260780334473,
"DataStreaming": 0.004762887954711914,
"DeConcat": 0.0018739700317382813,
"DeadCodeElimination": 0.001882314682006836,
"DeadStoreElimination": 0.03486776351928711,
"DelinearIndices": 0.009628534317016602,
"Delinearization": 0.0037381649017333984,
"DoNothing": 6.985664367675781e-05,
"DramToDramTranspose": 0.04212188720703125,
"DumpGraphAndMetadata": 0.004312038421630859,
"EliminateDivs": 0.005432844161987305,
"ExpandBatchNorm": 0.002119302749633789,
"ExpandISAMacro": 0.0024309158325195313,
"FactorizeBlkDims": 0.02235579490661621,
"FactorizeThreadAxesInFreeDims": 0.0018169879913330078,
"FlattenMacroLoop": 0.0030968189239501953,
"GenericAccessSimplifier": 0.0016777515411376953,
"InferInitValue": 0.043079376220703125,
"InferIntrinsicOnCC": 0.009890556335449219,
"InferNeuronTensor": 0.05600404739379883,
"InferNonlocalTensors": 0.03101515769958496,
"InferPSumTensor": 0.04645681381225586,
"InlineNativeKernels": 0.0015399456024169922,
"InsertIOTransposes": 0.02417731285095215,
"InsertLocalTransposes": 0.0070497989654541016,
"InsertOffloadedTransposes": 0.003525972366333008,
"LICM": 0.0035805702209472656,
"LateLegalizeInst": 0.0041539669036865234,
"LateLegalizePostSplit": 0.0027403831481933594,
"LateLowerReshapeOp": 0.0014560222625732422,
"LateLowerTensorOp": 0.004617452621459961,
"LateNeuronInstComb": 0.015344619750976563,
"LayoutPreprocessing": 0.030884981155395508,
"LayoutPreprocessingAndAnalysis": 0.06435275077819824,
"LayoutRequirementAnalysis": 0.007463693618774414,
"LegalizeCCOpLayout": 0.002064943313598633,
"LegalizeOpLevelAlias": 0.0011925697326660156,
"LegalizePartitionReduce": 0.0026116371154785156,
"LegalizeSundaAccess": 0.015822887420654297,
"LegalizeSundaMacro": 0.012560844421386719,
"LegalizeType": 0.004744291305541992,
"LocalLayoutOpt": 0.023772239685058594,
"LoopFusion": 0.0066835880279541016,
"LoopSplitting": 0.0003638267517089844,
"LowerBroadcast": 0.002238750457763672,
"LowerCCOpBlockAxis": 0.005678653717041016,
"LowerComplexBroadcast": 0.0019271373748779297,
"LowerIntrinsics": 0.042801856994628906,
"LowerTensorOp": 0.012106895446777344,
"LowerTranspose": 0.012960433959960938,
"MacroGeneration": 0.12800955772399902,
"MaskPropagation": 0.0031516551971435547,
"MemcpyElimination": 0.10379505157470703,
"MutateDataType": 0.0014393329620361328,
"NeuronAliasDependencyInduction": 0.00022101402282714844,
"NeuronAliasDependencyReset": 0.020102262496948242,
"NeuronInstComb": 0.009283781051635742,
"NeuronLICM": 0.009867429733276367,
"NeuronLoopFusion": 0.022713661193847656,
"NeuronLoopInterchange": 0.002709627151489258,
"NeuronSimplifier": 0.01328134536743164,
"NeuronSimplifyPredicates": 0.001683950424194336,
"NeuronValueNumbering": 0.0033235549926757813,
"OptimizeAliasedCopyChain": 0.0007724761962890625,
"OptimizeNKIKernels": 0.001729726791381836,
"PAGLayoutOpt": 0.13172507286071777,
"PComputeCutting": 0.007474422454833984,
"PGLayoutTilingPipeline": 0.9329550266265869,
"PGTiling": 0.4518747329711914,
"PadElimination": 0.00040411949157714844,
"ParAxesAnnotation": 0.0915369987487793,
"PartialLoopFusion": 0.020573854446411133,
"PartialSimdFusion": 0.04284977912902832,
"PerfectLoopNest": 0.002377033233642578,
"RecognizeOpIdiom": 0.0049991607666015625,
"Recompute": 0.00026345252990722656,
"RelaxPredicates": 0.0034220218658447266,
"Rematerialization": 0.0021615028381347656,
"ReshapeWeights": 0.0007557868957519531,
"ResolveAccessConflict": 0.004181861877441406,
"ResolveComplicatePredicates": 0.0015151500701904297,
"RewriteReplicationMatmul": 0.0020759105682373047,
"RewriteWeights": 0.0036649703979492188,
"SFKVectorizer": 0.20148277282714844,
"SimpleAllReduceTiling": 0.003732442855834961,
"Simplifier": 0.004697084426879883,
"SimplifyMacroPredicates": 0.007361888885498047,
"SimplifyNeuronTensor": 0.009825944900512695,
"SimplifySlice": 0.0017888545989990234,
"SimplifyTensor": 0.006832122802734375,
"SpillPSum": 0.022799968719482422,
"SplitAPUnionSets": 0.020108938217163086,
"SplitAccGrp": 0.0015766620635986328,
"StaticProfiler": 0.004146099090576172,
"StaticTransposeLocalTensor": 0.004926919937133789,
"SundaISel": 0.04472494125366211,
"TCTransform": 0.0018138885498046875,
"TensorInitialization": 0.004791736602783203,
"TensorOpSimplifier": 0.0064849853515625,
"TensorOpTransform": 0.0333099365234375,
"TileCCOps": 0.0056035518646240234,
"TilingProfiler": 0.01600933074951172,
"TransformConvOp": 0.002446413040161133,
"TritiumFusion": 0.1239166259765625,
"ValueNumbering": 0.0030901432037353516,
"VectorizeDMA": 0.0017311573028564453,
"VectorizeMatMult": 0.018932580947875977,
"WeightCoalescing": 0.0027513504028320313,
"ZeroSizeTensorElimination": 0.00011587142944335938
},
"tensorizer": {
"DMATilingProfiler::TotalInstructionsAfterTiling": 15811.0,
"StaticProfiler::AifUb": 934.4357299804688,
"StaticProfiler::ArithmeticIntensityTensorizer": 708.8487548828125,
"StaticProfiler::AverageDmaLength": 1109.3380126953125,
"StaticProfiler::AverageFractalPeUtilization": 100.0,
"StaticProfiler::AveragePartitionUtilization": 99.8372802734375,
"StaticProfiler::AveragePeUtilization": 100.0,
"StaticProfiler::DDRTransferBytes": 306283520.0,
"StaticProfiler::InternalTransferBytes": 104595456.0,
"StaticProfiler::LoadExpanded": 257536.0,
"StaticProfiler::LocalizationEfficiency": 75.85848236083984,
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 85.1915054321289,
"StaticProfiler::StoreExpanded": 10241.0,
"StaticProfiler::TotalDMAExpanded": 267777.0,
"StaticProfiler::TotalDynamicInstancesCount": 19667.0,
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 19667.0,
"StaticProfiler::TotalLNCComm": 0.0,
"StaticProfiler::TotalLNCCommTransfer": 0.0,
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
"TilingProfiler::GenericInstructionsAfterTiling": 64.0,
"TilingProfiler::MatMultInstructionsAfterTiling": 12288.0,
"TilingProfiler::NumPfTransposes": 9.0,
"TilingProfiler::NumPfTransposesForIo": 3.0,
"TilingProfiler::NumPfTransposesForLocal": 4.0,
"TilingProfiler::NumPfTransposesForNonlocal": 2.0,
"TilingProfiler::PfTransposeInstructions": 1904.0,
"TilingProfiler::PfTransposeInstructionsForIo": 272.0,
"TilingProfiler::PfTransposeInstructionsForLocal": 1120.0,
"TilingProfiler::PfTransposeInstructionsForNonlocal": 512.0,
"TilingProfiler::ReduceInstructionsAfterTiling": 0.0,
"TilingProfiler::SimdInstructionsAfterTiling": 704.0,
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
"TransformConvOp::conv2d_column_packing": 0.0,
"TransformConvOp::conv2d_column_packing_1": 0.0,
"TransformConvOp::conv2d_column_packing_io10": 0.0,
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
}
},
"sg0002": {
"compiletime": {
"AGOrderingAnalysisPass": 0.01837611198425293,
"AffinePredicateResolution": 0.0011184215545654297,
"AliasDependencyElimination": 0.00015664100646972656,
"AliasDependencyInduction": 0.005170583724975586,
"AliasDependencyReset": 0.027508020401000977,
"BFComputeCutting": 0.0036101341247558594,
"BirCodeGenLoop": 0.4774467945098877,
"CCOpFusion": 0.033265113830566406,
"CanonicalizeDAGForPGTiling": 0.004282712936401367,
"CanonicalizeIR": 0.0024569034576416016,
"CoalesceCCOp": 0.013993978500366211,
"CommuteConcat": 0.0017316341400146484,
"DMALocalityOpt": 0.005455970764160156,
"DMAProfiler": 0.012103080749511719,
"DMATilingProfiler": 0.0037560462951660156,
"DataLocalityOpt": 0.07645320892333984,
"DataStreaming": 0.03701519966125488,
"DeConcat": 0.0018520355224609375,
"DeadCodeElimination": 0.0020148754119873047,
"DeadStoreElimination": 0.006912708282470703,
"DelinearIndices": 0.004647254943847656,
"Delinearization": 0.003908872604370117,
"DoNothing": 7.414817810058594e-05,
"DramToDramTranspose": 0.02015542984008789,
"DumpGraphAndMetadata": 0.08691883087158203,
"EliminateDivs": 0.0025060176849365234,
"ExpandBatchNorm": 0.0027189254760742188,
"ExpandISAMacro": 0.010967254638671875,
"FactorizeBlkDims": 0.009678840637207031,
"FactorizeThreadAxesInFreeDims": 0.0023202896118164063,
"FlattenMacroLoop": 0.00232696533203125,
"GenericAccessSimplifier": 0.0008094310760498047,
"InferInitValue": 0.02833867073059082,
"InferIntrinsicOnCC": 0.008923768997192383,
"InferNeuronTensor": 0.025766372680664063,
"InferNonlocalTensors": 0.014599800109863281,
"InferPSumTensor": 0.28374218940734863,
"InlineNativeKernels": 0.00860905647277832,
"InsertIOTransposes": 0.01989889144897461,
"InsertLocalTransposes": 0.004229307174682617,
"InsertOffloadedTransposes": 0.0029871463775634766,
"LICM": 0.0030870437622070313,
"LateLegalizeInst": 0.01364445686340332,
"LateLegalizePostSplit": 0.014872312545776367,
"LateLowerReshapeOp": 0.0010464191436767578,
"LateLowerTensorOp": 0.002707242965698242,
"LateNeuronInstComb": 0.01008152961730957,
"LayoutPreprocessing": 0.026853561401367188,
"LayoutPreprocessingAndAnalysis": 0.0556035041809082,
"LayoutRequirementAnalysis": 0.004946470260620117,
"LegalizeCCOpLayout": 0.0025353431701660156,
"LegalizeOpLevelAlias": 0.0018966197967529297,
"LegalizePartitionReduce": 0.0017490386962890625,
"LegalizeSundaAccess": 0.0763850212097168,
"LegalizeSundaMacro": 0.012125253677368164,
"LegalizeType": 0.012414693832397461,
"LocalLayoutOpt": 0.013860225677490234,
"LoopFusion": 0.005201578140258789,
"LoopSplitting": 0.0003204345703125,
"LowerBroadcast": 0.0018322467803955078,
"LowerCCOpBlockAxis": 0.0040171146392822266,
"LowerComplexBroadcast": 0.002280712127685547,
"LowerIntrinsics": 0.3141806125640869,
"LowerTensorOp": 0.01141357421875,
"LowerTranspose": 0.012679815292358398,
"MacroGeneration": 0.034410953521728516,
"MaskPropagation": 0.0028192996978759766,
"MemcpyElimination": 0.02788853645324707,
"MutateDataType": 0.0012311935424804688,
"NeuronAliasDependencyInduction": 0.0001773834228515625,
"NeuronAliasDependencyReset": 0.024976015090942383,
"NeuronInstComb": 0.004675865173339844,
"NeuronLICM": 0.03631091117858887,
"NeuronLoopFusion": 0.008457422256469727,
"NeuronLoopInterchange": 0.001413106918334961,
"NeuronSimplifier": 0.007856369018554688,
"NeuronSimplifyPredicates": 0.11957573890686035,
"NeuronValueNumbering": 0.004334449768066406,
"OptimizeAliasedCopyChain": 0.0006341934204101563,
"OptimizeNKIKernels": 0.38834357261657715,
"PAGLayoutOpt": 0.0889735221862793,
"PComputeCutting": 0.005109071731567383,
"PGLayoutTilingPipeline": 0.6248171329498291,
"PGTiling": 0.1645822525024414,
"PadElimination": 0.0003485679626464844,
"ParAxesAnnotation": 0.05196070671081543,
"PartialLoopFusion": 0.011112451553344727,
"PartialSimdFusion": 0.012138128280639648,
"PerfectLoopNest": 0.002288341522216797,
"RecognizeOpIdiom": 0.0041277408599853516,
"Recompute": 0.00026416778564453125,
"RelaxPredicates": 0.01356959342956543,
"Rematerialization": 0.0024864673614501953,
"ReshapeWeights": 0.0007522106170654297,
"ResolveAccessConflict": 0.0048482418060302734,
"ResolveComplicatePredicates": 0.0015094280242919922,
"RewriteReplicationMatmul": 0.0015668869018554688,
"RewriteWeights": 0.0027174949645996094,
"SFKVectorizer": 0.27501797676086426,
"SimpleAllReduceTiling": 0.009322166442871094,
"Simplifier": 0.003630399703979492,
"SimplifyMacroPredicates": 0.011396646499633789,
"SimplifyNeuronTensor": 1.0555970668792725,
"SimplifySlice": 0.0023348331451416016,
"SimplifyTensor": 0.005601167678833008,
"SpillPSum": 0.013073921203613281,
"SplitAPUnionSets": 0.11336159706115723,
"SplitAccGrp": 0.001394510269165039,
"StaticProfiler": 0.014252662658691406,
"StaticTransposeLocalTensor": 0.003930330276489258,
"SundaISel": 0.04436635971069336,
"TCTransform": 0.0008757114410400391,
"TensorInitialization": 0.01558232307434082,
"TensorOpSimplifier": 0.004608869552612305,
"TensorOpTransform": 0.01923346519470215,
"TileCCOps": 0.005507707595825195,
"TilingProfiler": 0.007405757904052734,
"TransformConvOp": 0.0030219554901123047,
"TritiumFusion": 0.05425119400024414,
"ValueNumbering": 0.0020017623901367188,
"VectorizeDMA": 0.002228975296020508,
"VectorizeMatMult": 0.006806135177612305,
"WeightCoalescing": 0.008460044860839844,
"ZeroSizeTensorElimination": 0.00014281272888183594
},
"tensorizer": {
"DMATilingProfiler::TotalInstructionsAfterTiling": 49538.0,
"StaticProfiler::AifUb": 304.240234375,
"StaticProfiler::ArithmeticIntensityTensorizer": 278.67474365234375,
"StaticProfiler::AverageDmaLength": 1974.1033935546875,
"StaticProfiler::AverageFractalPeUtilization": 99.7004623413086,
"StaticProfiler::AveragePartitionUtilization": 97.94140625,
"StaticProfiler::AveragePeUtilization": 98.78884887695313,
"StaticProfiler::DDRTransferBytes": 862646080.0,
"StaticProfiler::InternalTransferBytes": 669456896.0,
"StaticProfiler::LoadExpanded": 390679.0,
"StaticProfiler::LocalizationEfficiency": 91.59693145751953,
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 95.863037109375,
"StaticProfiler::StoreExpanded": 7261.0,
"StaticProfiler::TotalDMAExpanded": 397940.0,
"StaticProfiler::TotalDynamicInstancesCount": 59578.0,
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 59132.0,
"StaticProfiler::TotalLNCComm": 0.0,
"StaticProfiler::TotalLNCCommTransfer": 0.0,
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
"TilingProfiler::GenericInstructionsAfterTiling": 4.0,
"TilingProfiler::MatMultInstructionsAfterTiling": 28224.0,
"TilingProfiler::NumPfTransposes": 5.0,
"TilingProfiler::NumPfTransposesForIo": 1.0,
"TilingProfiler::NumPfTransposesForLocal": 1.0,
"TilingProfiler::NumPfTransposesForNonlocal": 3.0,
"TilingProfiler::PfTransposeInstructions": 19777.0,
"TilingProfiler::PfTransposeInstructionsForIo": 19008.0,
"TilingProfiler::PfTransposeInstructionsForLocal": 1.0,
"TilingProfiler::PfTransposeInstructionsForNonlocal": 768.0,
"TilingProfiler::ReduceInstructionsAfterTiling": 6.0,
"TilingProfiler::SimdInstructionsAfterTiling": 303.0,
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
"TransformConvOp::conv2d_column_packing": 0.0,
"TransformConvOp::conv2d_column_packing_1": 0.0,
"TransformConvOp::conv2d_column_packing_io10": 0.0,
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
}
},
"sg01": {
"compiletime": {
"CanonicalizeConv": 2.2000000171829015e-05,
"CanonicalizeForTensorizer": 1.4999999621068127e-05,
"Canonicalizer": 0.00025499999173916876,
"HoistCompute": 1.9999999949504854e-06,
"IdentifyCrossPassTensors": 2.499999936844688e-05,
"MemcastMotion": 7.000000096013537e-06,
"PenguinizeFunctions": 1.4999999621068127e-05,
"PruneFunctions": 1.8999999156221747e-05,
"RemoveOptimizationBarriers": 2.700000004551839e-05,
"ScatterMotion": 1.9999999494757503e-05,
"TensorizerLegalizationPass": 1.9999999494757503e-05,
"VerifySupportedOps": 1.1000000085914508e-05,
"algsimp": 6.299999949987978e-05,
"batchnorm_expander": 1.4000000192027073e-05,
"boundary-marker-removal": 4.999999873689376e-06,
"call-inliner": 9.000000318337698e-06,
"canonicalize-boundary-marker": 6.000000212225132e-06,
"collective-stream-id-checker": 4.999999873689376e-06,
"comparison-expander": 4.999999873689376e-06,
"computation-deduplicator": 2.5999999706982635e-05,
"conditional-to-select": 4.999999873689376e-06,
"config-lowering": 2.2000000171829015e-05,
"constant_folding": 9.000000318337698e-06,
"cse": 1.2000000424450263e-05,
"dce": 9.999999974752427e-07,
"dynamic-slice-transpose": 3.999999989900971e-06,
"eliminate-redundant-compare": 3.999999989900971e-06,
"emit-offloaded-dropout": 1.2999999853491317e-05,
"flatten-call-graph": 9.000000318337698e-06,
"fuse-send-recv": 2.9999999242136255e-05,
"hilo::LegalizeAlias": 4.999999873689376e-06,
"hilo::NeuronInstCombine": 3.600000127335079e-05,
"hilo::NeuronOpFusion": 1.4000000192027073e-05,
"hilo::ReplaceTokenTypeWithU8Pass": 2.099999983329326e-05,
"hilo::ScheduleFusion": 9.999999974752427e-07,
"hilo::SixtyFourHack": 1.4000000192027073e-05,
"hilo::VerifyAliasing": 1.9999999949504854e-06,
"hlo-mac-count": 4.600000102072954e-05,
"hlo-verifier": 0.00023299999884329736,
"legalize-ccops": 9.999999974752427e-07,
"legalize-compare": 3.999999989900971e-06,
"lower-argminmax-custom-call": 3.999999989900971e-06,
"map-inline": 1.1000000085914508e-05,
"metadata-naming": 2.700000004551839e-05,
"mlir::detail::OpToOpPassAdaptor": 0.00017299999308306724,
"mlir::hlo::MhloToPyPenguin": 0.0009840000420808792,
"mlir::mhlo::LowerComplexExtraPass": 9.600000339560211e-05,
"mlir::mhlo::LowerComplexPass": 0.00013600000238511711,
"native-to-custom-softmax": 6.000000212225132e-06,
"native-to-custom-softmax-dx": 2.2000000171829015e-05,
"operand_upcaster": 2.4000000848900527e-05,
"post-par-pipe-begin": 3.000000106112566e-06,
"post-par-pipe-end": 0.0,
"post-partition-simplification": 0.0005660000024363399,
"replace-minimum-constant": 6.000000212225132e-06,
"reshape-mover": 3.000000106112566e-06,
"simplify-concat": 4.8999998398358e-05,
"simplify-while-loops": 1.9999999949504854e-06,
"transform-variadic-reduce": 9.000000318337698e-06,
"tuple-simplifier": 4.999999873689376e-06,
"unpack-nested-aws-ntwsr": 3.999999989900971e-06,
"unroll-while-loop": 0.0
},
"hilo": {
"ArithmeticIntensity": 834.6854858398438,
"HloMacCount": 103079215104.0,
"Traffic": 246989344.0
}
},
"sg02": {
"compiletime": {
"CanonicalizeConv": 0.0,
"CanonicalizeForTensorizer": 1.4000000192027073e-05,
"Canonicalizer": 0.0003129999968223274,
"HoistCompute": 9.999999974752427e-07,
"IdentifyCrossPassTensors": 1.2000000424450263e-05,
"MemcastMotion": 0.0,
"PenguinizeFunctions": 9.999999747378752e-06,
"PruneFunctions": 7.999999979801942e-06,
"RemoveOptimizationBarriers": 9.000000318337698e-06,
"ScatterMotion": 3.000000106112566e-06,
"TensorizerLegalizationPass": 6.000000212225132e-06,
"VerifySupportedOps": 1.2000000424450263e-05,
"algsimp": 5.999999848427251e-05,
"batchnorm_expander": 1.2999999853491317e-05,
"boundary-marker-removal": 3.999999989900971e-06,
"call-inliner": 9.999999747378752e-06,
"canonicalize-boundary-marker": 4.999999873689376e-06,
"collective-stream-id-checker": 3.999999989900971e-06,
"comparison-expander": 4.999999873689376e-06,
"computation-deduplicator": 2.5999999706982635e-05,
"conditional-to-select": 7.000000096013537e-06,
"config-lowering": 2.5999999706982635e-05,
"constant_folding": 9.000000318337698e-06,
"cse": 1.2000000424450263e-05,
"dce": 9.999999974752427e-07,
"dynamic-slice-transpose": 3.999999989900971e-06,
"eliminate-redundant-compare": 3.000000106112566e-06,
"emit-offloaded-dropout": 1.2999999853491317e-05,
"flatten-call-graph": 1.2000000424450263e-05,
"fuse-send-recv": 1.9999999494757503e-05,
"hilo::LegalizeAlias": 1.9999999949504854e-06,
"hilo::NeuronInstCombine": 6.800000119255856e-05,
"hilo::NeuronOpFusion": 3.999999989900971e-06,
"hilo::ReplaceTokenTypeWithU8Pass": 2.2000000171829015e-05,
"hilo::ScheduleFusion": 0.00015900000289548188,
"hilo::SixtyFourHack": 3.9999998989515007e-05,
"hilo::VerifyAliasing": 9.999999974752427e-07,
"hlo-mac-count": 0.00017699999443721026,
"hlo-verifier": 0.0001829999964684248,
"legalize-ccops": 1.9999999949504854e-06,
"legalize-compare": 3.000000106112566e-06,
"lower-argminmax-custom-call": 3.000000106112566e-06,
"map-inline": 1.2000000424450263e-05,
"metadata-naming": 1.5999999959603883e-05,
"mlir::detail::OpToOpPassAdaptor": 1.1000000085914508e-05,
"mlir::hlo::MhloToPyPenguin": 0.0008900000248104334,
"mlir::mhlo::LowerComplexExtraPass": 8.800000068731606e-05,
"mlir::mhlo::LowerComplexPass": 0.00011999999696854502,
"native-to-custom-softmax": 6.000000212225132e-06,
"native-to-custom-softmax-dx": 2.4000000848900527e-05,
"operand_upcaster": 1.5999999959603883e-05,
"post-par-pipe-begin": 1.9999999949504854e-06,
"post-par-pipe-end": 0.0,
"post-partition-simplification": 0.0005510000046342611,
"replace-minimum-constant": 9.000000318337698e-06,
"reshape-mover": 3.000000106112566e-06,
"simplify-concat": 4.400000034365803e-05,
"simplify-while-loops": 1.9999999949504854e-06,
"transform-variadic-reduce": 6.299999949987978e-05,
"tuple-simplifier": 4.999999873689376e-06,
"unpack-nested-aws-ntwsr": 3.999999989900971e-06,
"unroll-while-loop": 0.0
},
"hilo": {
"ArithmeticIntensity": 194.41075134277344,
"HloMacCount": 77620576256.0,
"Traffic": 798521408.0
}
}
}